From 80006dbee674f9fa7c20a799d208657a18b5dabf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 20 Jun 2018 01:05:35 +0900 Subject: kprobes/x86: Remove jprobe implementation Remove arch dependent setjump/longjump functions and unused fields in kprobe_ctlblk for jprobes from arch/x86. Signed-off-by: Masami Hiramatsu Acked-by: Thomas Gleixner Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/lkml/152942433578.15209.14034551799624757792.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kprobes.h | 3 -- arch/x86/kernel/kprobes/core.c | 96 ++---------------------------------------- 2 files changed, 3 insertions(+), 96 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 367d99cff426..06782c2efa04 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -111,9 +111,6 @@ struct kprobe_ctlblk { unsigned long kprobe_status; unsigned long kprobe_old_flags; unsigned long kprobe_saved_flags; - unsigned long *jprobe_saved_sp; - struct pt_regs jprobe_saved_regs; - kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; struct prev_kprobe prev_kprobe; }; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6f4d42377fe5..1b2d1acba810 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -66,8 +66,6 @@ #include "common.h" -void jprobe_return_end(void); - DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -690,10 +688,9 @@ int kprobe_int3_handler(struct pt_regs *regs) /* * If we have no pre-handler or it returned 0, we * continue with normal processing. If we have a - * pre-handler and it returned non-zero, it prepped - * for calling the break_handler below on re-entry - * for jprobe processing, so get out doing nothing - * more here. + * pre-handler and it returned non-zero, that means + * user handler setup registers to exit to another + * instruction, we must skip the single stepping. */ if (!p->pre_handler || !p->pre_handler(p, regs)) setup_singlestep(p, regs, kcb, 0); @@ -1083,93 +1080,6 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, } NOKPROBE_SYMBOL(kprobe_exceptions_notify); -int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - unsigned long addr; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - kcb->jprobe_saved_regs = *regs; - kcb->jprobe_saved_sp = stack_addr(regs); - addr = (unsigned long)(kcb->jprobe_saved_sp); - - /* - * As Linus pointed out, gcc assumes that the callee - * owns the argument space and could overwrite it, e.g. - * tailcall optimization. So, to be absolutely safe - * we also save and restore enough stack bytes to cover - * the argument area. - * Use __memcpy() to avoid KASAN stack out-of-bounds reports as we copy - * raw stack chunk with redzones: - */ - __memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); - regs->ip = (unsigned long)(jp->entry); - - /* - * jprobes use jprobe_return() which skips the normal return - * path of the function, and this messes up the accounting of the - * function graph tracer to get messed up. - * - * Pause function graph tracing while performing the jprobe function. - */ - pause_graph_tracing(); - return 1; -} -NOKPROBE_SYMBOL(setjmp_pre_handler); - -void jprobe_return(void) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - /* Unpoison stack redzones in the frames we are going to jump over. */ - kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp); - - asm volatile ( -#ifdef CONFIG_X86_64 - " xchg %%rbx,%%rsp \n" -#else - " xchgl %%ebx,%%esp \n" -#endif - " int3 \n" - " .globl jprobe_return_end\n" - " jprobe_return_end: \n" - " nop \n"::"b" - (kcb->jprobe_saved_sp):"memory"); -} -NOKPROBE_SYMBOL(jprobe_return); -NOKPROBE_SYMBOL(jprobe_return_end); - -int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - u8 *addr = (u8 *) (regs->ip - 1); - struct jprobe *jp = container_of(p, struct jprobe, kp); - void *saved_sp = kcb->jprobe_saved_sp; - - if ((addr > (u8 *) jprobe_return) && - (addr < (u8 *) jprobe_return_end)) { - if (stack_addr(regs) != saved_sp) { - struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; - printk(KERN_ERR - "current sp %p does not match saved sp %p\n", - stack_addr(regs), saved_sp); - printk(KERN_ERR "Saved registers for jprobe %p\n", jp); - show_regs(saved_regs); - printk(KERN_ERR "Current registers\n"); - show_regs(regs); - BUG(); - } - /* It's OK to start function graph tracing again */ - unpause_graph_tracing(); - *regs = kcb->jprobe_saved_regs; - __memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp)); - preempt_enable_no_resched(); - return 1; - } - return 0; -} -NOKPROBE_SYMBOL(longjmp_break_handler); - bool arch_within_kprobe_blacklist(unsigned long addr) { bool is_in_entry_trampoline_section = false; -- cgit v1.2.3 From e704e34cd0bbd1c69eb4ca724935a23f6440502e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 20 Jun 2018 01:10:55 +0900 Subject: kprobes/x86: Don't call the ->break_handler() in x86 kprobes Don't call the ->break_handler() and remove break_handler related code from x86 since that was only used by jprobe which got removed. Signed-off-by: Masami Hiramatsu Acked-by: Thomas Gleixner Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/lkml/152942465549.15209.15889693025972771135.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kprobes.h | 2 +- arch/x86/kernel/kprobes/common.h | 10 ---------- arch/x86/kernel/kprobes/core.c | 7 ------- arch/x86/kernel/kprobes/ftrace.c | 42 +++++++++++----------------------------- 4 files changed, 12 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 06782c2efa04..c8cec1b39b88 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -78,7 +78,7 @@ struct arch_specific_insn { * boostable = true: This instruction has been boosted: we have * added a relative jump after the instruction copy in insn, * so no single-step and fixup are needed (unless there's - * a post_handler or break_handler). + * a post_handler). */ bool boostable; bool if_modifier; diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index ae38dccf0c8f..2b949f4fd4d8 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -105,14 +105,4 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig } #endif -#ifdef CONFIG_KPROBES_ON_FTRACE -extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb); -#else -static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - return 0; -} -#endif #endif diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 1b2d1acba810..0ac16a0d93e5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -709,13 +709,6 @@ int kprobe_int3_handler(struct pt_regs *regs) regs->ip = (unsigned long)addr; preempt_enable_no_resched(); return 1; - } else if (kprobe_running()) { - p = __this_cpu_read(current_kprobe); - if (p->break_handler && p->break_handler(p, regs)) { - if (!skip_singlestep(p, regs, kcb)) - setup_singlestep(p, regs, kcb, 0); - return 1; - } } /* else: not a kprobe fault; let the kernel handle it */ preempt_enable_no_resched(); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 8dc0161cec8f..02a6dd1b6bd0 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,36 +25,6 @@ #include "common.h" -static nokprobe_inline -void __skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb, unsigned long orig_ip) -{ - /* - * Emulate singlestep (and also recover regs->ip) - * as if there is a 5byte nop - */ - regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - __this_cpu_write(current_kprobe, NULL); - if (orig_ip) - regs->ip = orig_ip; -} - -int skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - if (kprobe_ftrace(p)) { - __skip_singlestep(p, regs, kcb, 0); - preempt_enable_no_resched(); - return 1; - } - return 0; -} -NOKPROBE_SYMBOL(skip_singlestep); - /* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) @@ -80,7 +50,17 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (!p->pre_handler || !p->pre_handler(p, regs)) { - __skip_singlestep(p, regs, kcb, orig_ip); + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + regs->ip = orig_ip; + __this_cpu_write(current_kprobe, NULL); preempt_enable_no_resched(); } /* -- cgit v1.2.3 From cce188bd58cfbd603b904dbce75f34de2eff959a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 20 Jun 2018 01:15:45 +0900 Subject: bpf/error-inject/kprobes: Clear current_kprobe and enable preempt in kprobe Clear current_kprobe and enable preemption in kprobe even if pre_handler returns !0. This simplifies function override using kprobes. Jprobe used to require to keep the preemption disabled and keep current_kprobe until it returned to original function entry. For this reason kprobe_int3_handler() and similar arch dependent kprobe handers checks pre_handler result and exit without enabling preemption if the result is !0. After removing the jprobe, Kprobes does not need to keep preempt disabled even if user handler returns !0 anymore. But since the function override handler in error-inject and bpf is also returns !0 if it overrides a function, to balancing the preempt count, it enables preemption and reset current kprobe by itself. That is a bad design that is very buggy. This fixes such unbalanced preempt-count and current_kprobes setting in kprobes, bpf and error-inject. Note: for powerpc and x86, this removes all preempt_disable from kprobe_ftrace_handler because ftrace callbacks are called under preempt disabled. Signed-off-by: Masami Hiramatsu Acked-by: Thomas Gleixner Acked-by: Naveen N. Rao Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: David S. Miller Cc: Fenghua Yu Cc: Heiko Carstens Cc: James Hogan Cc: Josef Bacik Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Russell King Cc: Steven Rostedt Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: sparclinux@vger.kernel.org Link: https://lore.kernel.org/lkml/152942494574.15209.12323837825873032258.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 4 ++++ arch/x86/kernel/kprobes/ftrace.c | 9 +++------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 0ac16a0d93e5..814e26b7c8a2 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -694,6 +694,10 @@ int kprobe_int3_handler(struct pt_regs *regs) */ if (!p->pre_handler || !p->pre_handler(p, regs)) setup_singlestep(p, regs, kcb, 0); + else { + reset_current_kprobe(); + preempt_enable_no_resched(); + } return 1; } } else if (*addr != BREAKPOINT_INSTRUCTION) { diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 02a6dd1b6bd0..ef819e19650b 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -45,8 +45,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ regs->ip = ip + sizeof(kprobe_opcode_t); - /* To emulate trap based kprobes, preempt_disable here */ - preempt_disable(); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (!p->pre_handler || !p->pre_handler(p, regs)) { @@ -60,13 +58,12 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, p->post_handler(p, regs, 0); } regs->ip = orig_ip; - __this_cpu_write(current_kprobe, NULL); - preempt_enable_no_resched(); } /* - * If pre_handler returns !0, it sets regs->ip and - * resets current kprobe, and keep preempt count +1. + * If pre_handler returns !0, it changes regs->ip. We have to + * skip emulating post_handler. */ + __this_cpu_write(current_kprobe, NULL); } } NOKPROBE_SYMBOL(kprobe_ftrace_handler); -- cgit v1.2.3 From 2bbda764d720aacabaad38374d26fcccb7843f17 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 20 Jun 2018 01:16:17 +0900 Subject: kprobes/x86: Do not disable preempt on int3 path Since int3 and debug exception(for singlestep) are run with IRQ disabled and while running single stepping we drop IF from regs->flags, that path must not be preemptible. So we can remove the preempt disable/enable calls from that path. Suggested-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Acked-by: Thomas Gleixner Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/lkml/152942497779.15209.2879580696589868291.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 18 ++++-------------- arch/x86/kernel/kprobes/opt.c | 1 - 2 files changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 814e26b7c8a2..f7104b256de7 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -594,7 +594,6 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, * stepping. */ regs->ip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); return; } #endif @@ -667,12 +666,10 @@ int kprobe_int3_handler(struct pt_regs *regs) addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); /* - * We don't want to be preempted for the entire - * duration of kprobe processing. We conditionally - * re-enable preemption at the end of this function, - * and also in reenter_kprobe() and setup_singlestep(). + * We don't want to be preempted for the entire duration of kprobe + * processing. Since int3 and debug trap disables irqs and we clear + * IF while singlestepping, it must be no preemptible. */ - preempt_disable(); kcb = get_kprobe_ctlblk(); p = get_kprobe(addr); @@ -694,10 +691,8 @@ int kprobe_int3_handler(struct pt_regs *regs) */ if (!p->pre_handler || !p->pre_handler(p, regs)) setup_singlestep(p, regs, kcb, 0); - else { + else reset_current_kprobe(); - preempt_enable_no_resched(); - } return 1; } } else if (*addr != BREAKPOINT_INSTRUCTION) { @@ -711,11 +706,9 @@ int kprobe_int3_handler(struct pt_regs *regs) * the original instruction. */ regs->ip = (unsigned long)addr; - preempt_enable_no_resched(); return 1; } /* else: not a kprobe fault; let the kernel handle it */ - preempt_enable_no_resched(); return 0; } NOKPROBE_SYMBOL(kprobe_int3_handler); @@ -966,8 +959,6 @@ int kprobe_debug_handler(struct pt_regs *regs) } reset_current_kprobe(); out: - preempt_enable_no_resched(); - /* * if somebody else is singlestepping across a probe point, flags * will have TF set, in which case, continue the remaining processing @@ -1014,7 +1005,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else reset_current_kprobe(); - preempt_enable_no_resched(); } else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE || kcb->kprobe_status == KPROBE_HIT_SSDONE) { /* diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 203d398802a3..eaf02f2e7300 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -491,7 +491,6 @@ int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; if (!reenter) reset_current_kprobe(); - preempt_enable_no_resched(); return 1; } return 0; -- cgit v1.2.3 From 0592e57b24e7e05ec1f4c50b9666c013abff7017 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 5 Jun 2018 08:38:45 -0700 Subject: perf/x86/intel/lbr: Fix incomplete LBR call stack LBR has a limited stack size. If a task has a deeper call stack than LBR's stack size, only the overflowed part is reported. A complete call stack may not be reconstructed by perf tool. Current code doesn't access all LBR registers. It only read the ones below the TOS. The LBR registers above the TOS will be discarded unconditionally. When a CALL is captured, the TOS is incremented by 1 , modulo max LBR stack size. The LBR HW only records the call stack information to the register which the TOS points to. It will not touch other LBR registers. So the registers above the TOS probably still store the valid call stack information for an overflowed call stack, which need to be reported. To retrieve complete call stack information, we need to start from TOS, read all LBR registers until an invalid entry is detected. 0s can be used to detect the invalid entry, because: - When a RET is captured, the HW zeros the LBR register which TOS points to, then decreases the TOS. - The LBR registers are reset to 0 when adding a new LBR event or scheduling an existing LBR event. - A taken branch at IP 0 is not expected The context switch code is also modified to save/restore all valid LBR registers. Furthermore, the LBR registers, which don't have valid call stack information, need to be reset in restore, because they may be polluted while swapped out. Here is a small test program, tchain_deep. Its call stack is deeper than 32. noinline void f33(void) { int i; for (i = 0; i < 10000000;) { if (i%2) i++; else i++; } } noinline void f32(void) { f33(); } noinline void f31(void) { f32(); } ... ... noinline void f1(void) { f2(); } int main() { f1(); } Here is the test result on SKX. The max stack size of SKX is 32. Without the patch: $ perf record -e cycles --call-graph lbr -- ./tchain_deep $ perf report --stdio # # Children Self Command Shared Object Symbol # ........ ........ ........... ................ ................. # 100.00% 99.99% tchain_deep tchain_deep [.] f33 | --99.99%--f30 f31 f32 f33 With the patch: $ perf record -e cycles --call-graph lbr -- ./tchain_deep $ perf report --stdio # Children Self Command Shared Object Symbol # ........ ........ ........... ................ .................. # 99.99% 0.00% tchain_deep tchain_deep [.] f1 | ---f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31 f32 f33 Signed-off-by: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Stephane Eranian Cc: Vince Weaver Cc: Alexander Shishkin Cc: Thomas Gleixner Cc: acme@kernel.org Cc: eranian@google.com Link: https://lore.kernel.org/lkml/1528213126-4312-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/lbr.c | 32 ++++++++++++++++++++++++++------ arch/x86/events/perf_event.h | 1 + 2 files changed, 27 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index cf372b90557e..a4170048a30b 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -346,7 +346,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) mask = x86_pmu.lbr_nr - 1; tos = task_ctx->tos; - for (i = 0; i < tos; i++) { + for (i = 0; i < task_ctx->valid_lbrs; i++) { lbr_idx = (tos - i) & mask; wrlbr_from(lbr_idx, task_ctx->lbr_from[i]); wrlbr_to (lbr_idx, task_ctx->lbr_to[i]); @@ -354,6 +354,15 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } + + for (; i < x86_pmu.lbr_nr; i++) { + lbr_idx = (tos - i) & mask; + wrlbr_from(lbr_idx, 0); + wrlbr_to(lbr_idx, 0); + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) + wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0); + } + wrmsrl(x86_pmu.lbr_tos, tos); task_ctx->lbr_stack_state = LBR_NONE; } @@ -361,7 +370,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) { unsigned lbr_idx, mask; - u64 tos; + u64 tos, from; int i; if (task_ctx->lbr_callstack_users == 0) { @@ -371,13 +380,17 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) mask = x86_pmu.lbr_nr - 1; tos = intel_pmu_lbr_tos(); - for (i = 0; i < tos; i++) { + for (i = 0; i < x86_pmu.lbr_nr; i++) { lbr_idx = (tos - i) & mask; - task_ctx->lbr_from[i] = rdlbr_from(lbr_idx); + from = rdlbr_from(lbr_idx); + if (!from) + break; + task_ctx->lbr_from[i] = from; task_ctx->lbr_to[i] = rdlbr_to(lbr_idx); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } + task_ctx->valid_lbrs = i; task_ctx->tos = tos; task_ctx->lbr_stack_state = LBR_VALID; } @@ -531,7 +544,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) */ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { - bool need_info = false; + bool need_info = false, call_stack = false; unsigned long mask = x86_pmu.lbr_nr - 1; int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); @@ -542,7 +555,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) if (cpuc->lbr_sel) { need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); if (cpuc->lbr_sel->config & LBR_CALL_STACK) - num = tos; + call_stack = true; } for (i = 0; i < num; i++) { @@ -555,6 +568,13 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) from = rdlbr_from(lbr_idx); to = rdlbr_to(lbr_idx); + /* + * Read LBR call stack entries + * until invalid entry (0s) is detected. + */ + if (call_stack && !from) + break; + if (lbr_format == LBR_FORMAT_INFO && need_info) { u64 info; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 9f3711470ec1..6b72a92069fd 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -648,6 +648,7 @@ struct x86_perf_task_context { u64 lbr_to[MAX_LBR_ENTRIES]; u64 lbr_info[MAX_LBR_ENTRIES]; int tos; + int valid_lbrs; int lbr_callstack_users; int lbr_stack_state; }; -- cgit v1.2.3 From 8b077e4a69bef5c4121426e99497975860191e53 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 5 Jun 2018 08:38:46 -0700 Subject: perf/x86/intel/lbr: Optimize context switches for the LBR call stack Context switches with perf LBR call stack context are fairly expensive because they do a lot of MSR writes. Currently we unconditionally do the expensive operation when LBR call stack is enabled. It's not necessary for some common cases, e.g task -> other kernel thread -> same task. The LBR registers are not changed, hence they don't need to be rewritten/restored. Introduce per-CPU variables to track the last LBR call stack context. If the same context is scheduled in, the rewrite/restore is not required, with the following two exceptions: - The LBR registers may be modified by a normal LBR event, i.e., adding a new LBR event or scheduling an existing LBR event. In both cases, the LBR registers are reset first. The last LBR call stack information is cleared in intel_pmu_lbr_reset(). Restoring the LBR registers is required. - The LBR registers are initialized to zero in C6. If the LBR registers which TOS points is cleared, C6 must be entered while swapped out. Restoring the LBR registers is required as well. These exceptions are not common. Signed-off-by: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Stephane Eranian Cc: Vince Weaver Cc: Alexander Shishkin Cc: acme@kernel.org Cc: eranian@google.com Link: https://lore.kernel.org/lkml/1528213126-4312-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/lbr.c | 24 +++++++++++++++++++++++- arch/x86/events/perf_event.h | 4 ++++ 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index a4170048a30b..f3e006bed9a7 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -216,6 +216,8 @@ static void intel_pmu_lbr_reset_64(void) void intel_pmu_lbr_reset(void) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + if (!x86_pmu.lbr_nr) return; @@ -223,6 +225,9 @@ void intel_pmu_lbr_reset(void) intel_pmu_lbr_reset_32(); else intel_pmu_lbr_reset_64(); + + cpuc->last_task_ctx = NULL; + cpuc->last_log_id = 0; } /* @@ -334,6 +339,7 @@ static inline u64 rdlbr_to(unsigned int idx) static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; unsigned lbr_idx, mask; u64 tos; @@ -344,8 +350,20 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) return; } - mask = x86_pmu.lbr_nr - 1; tos = task_ctx->tos; + /* + * Does not restore the LBR registers, if + * - No one else touched them, and + * - Did not enter C6 + */ + if ((task_ctx == cpuc->last_task_ctx) && + (task_ctx->log_id == cpuc->last_log_id) && + rdlbr_from(tos)) { + task_ctx->lbr_stack_state = LBR_NONE; + return; + } + + mask = x86_pmu.lbr_nr - 1; for (i = 0; i < task_ctx->valid_lbrs; i++) { lbr_idx = (tos - i) & mask; wrlbr_from(lbr_idx, task_ctx->lbr_from[i]); @@ -369,6 +387,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned lbr_idx, mask; u64 tos, from; int i; @@ -393,6 +412,9 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) task_ctx->valid_lbrs = i; task_ctx->tos = tos; task_ctx->lbr_stack_state = LBR_VALID; + + cpuc->last_task_ctx = task_ctx; + cpuc->last_log_id = ++task_ctx->log_id; } void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 6b72a92069fd..2430398befd8 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -163,6 +163,7 @@ struct intel_excl_cntrs { unsigned core_id; /* per-core: core id */ }; +struct x86_perf_task_context; #define MAX_LBR_ENTRIES 32 enum { @@ -214,6 +215,8 @@ struct cpu_hw_events { struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; struct er_account *lbr_sel; u64 br_sel; + struct x86_perf_task_context *last_task_ctx; + int last_log_id; /* * Intel host/guest exclude bits @@ -651,6 +654,7 @@ struct x86_perf_task_context { int valid_lbrs; int lbr_callstack_users; int lbr_stack_state; + int log_id; }; #define x86_add_quirk(func_) \ -- cgit v1.2.3 From 0ea063306eecf300fcf06d2f5917474b580f666f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 28 Apr 2018 21:37:03 +0900 Subject: kprobes/x86: Fix %p uses in error messages Remove all %p uses in error messages in kprobes/x86. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Arnd Bergmann Cc: David Howells Cc: David S . Miller Cc: Heiko Carstens Cc: Jon Medhurst Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Richter Cc: Tobin C . Harding Cc: Will Deacon Cc: acme@kernel.org Cc: akpm@linux-foundation.org Cc: brueckner@linux.vnet.ibm.com Cc: linux-arch@vger.kernel.org Cc: rostedt@goodmis.org Cc: schwidefsky@de.ibm.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/152491902310.9916.13355297638917767319.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index f7104b256de7..b0d1e81c96bb 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -393,8 +393,6 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) - (u8 *) real; if ((s64) (s32) newdisp != newdisp) { pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); - pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", - src, real, insn->displacement.value); return 0; } disp = (u8 *) dest + insn_offset_displacement(insn); @@ -637,8 +635,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, * Raise a BUG or we'll continue in an endless reentering loop * and eventually a stack overflow. */ - printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", - p->addr); + pr_err("Unrecoverable kprobe detected.\n"); dump_kprobe(p); BUG(); default: -- cgit v1.2.3 From 8e983ff9ac02a8fb454ed09c2462bdb3617006a8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Jun 2018 04:58:49 +0200 Subject: perf/hw_breakpoint: Pass arch breakpoint struct to arch_check_bp_in_kernelspace() We can't pass the breakpoint directly on arch_check_bp_in_kernelspace() anymore because its architecture internal datas (struct arch_hw_breakpoint) are not yet filled by the time we call the function, and most implementation need this backend to be up to date. So arrange the function to take the probing struct instead. Signed-off-by: Frederic Weisbecker Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chris Zankel Cc: Jiri Olsa Cc: Joel Fernandes Cc: Linus Torvalds Cc: Mark Rutland Cc: Max Filippov Cc: Michael Ellerman Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/1529981939-8231-3-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_breakpoint.h | 2 +- arch/x86/kernel/hw_breakpoint.c | 71 +++++++++++++++++++----------------- 2 files changed, 39 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index f59c39835a5a..78924596f51f 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -52,7 +52,7 @@ static inline int hw_breakpoint_slots(int type) struct perf_event; struct pmu; -extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); extern int arch_validate_hwbkpt_settings(struct perf_event *bp); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 8771766d46b6..c43379188f4f 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -169,28 +169,29 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) set_dr_addr_mask(0, i); } -/* - * Check for virtual address in kernel space. - */ -int arch_check_bp_in_kernelspace(struct perf_event *bp) +static int arch_bp_generic_len(int x86_len) { - unsigned int len; - unsigned long va; - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - - va = info->address; - len = bp->attr.bp_len; - - /* - * We don't need to worry about va + len - 1 overflowing: - * we already require that va is aligned to a multiple of len. - */ - return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); + switch (x86_len) { + case X86_BREAKPOINT_LEN_1: + return HW_BREAKPOINT_LEN_1; + case X86_BREAKPOINT_LEN_2: + return HW_BREAKPOINT_LEN_2; + case X86_BREAKPOINT_LEN_4: + return HW_BREAKPOINT_LEN_4; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + return HW_BREAKPOINT_LEN_8; +#endif + default: + return -EINVAL; + } } int arch_bp_generic_fields(int x86_len, int x86_type, int *gen_len, int *gen_type) { + int len; + /* Type */ switch (x86_type) { case X86_BREAKPOINT_EXECUTE: @@ -211,28 +212,32 @@ int arch_bp_generic_fields(int x86_len, int x86_type, } /* Len */ - switch (x86_len) { - case X86_BREAKPOINT_LEN_1: - *gen_len = HW_BREAKPOINT_LEN_1; - break; - case X86_BREAKPOINT_LEN_2: - *gen_len = HW_BREAKPOINT_LEN_2; - break; - case X86_BREAKPOINT_LEN_4: - *gen_len = HW_BREAKPOINT_LEN_4; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - *gen_len = HW_BREAKPOINT_LEN_8; - break; -#endif - default: + len = arch_bp_generic_len(x86_len); + if (len < 0) return -EINVAL; - } + *gen_len = len; return 0; } +/* + * Check for virtual address in kernel space. + */ +int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) +{ + unsigned long va; + int len; + + va = hw->address; + len = arch_bp_generic_len(hw->len); + WARN_ON_ONCE(len < 0); + + /* + * We don't need to worry about va + len - 1 overflowing: + * we already require that va is aligned to a multiple of len. + */ + return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); +} static int arch_build_bp_info(struct perf_event *bp) { -- cgit v1.2.3 From a0baf043c5cfa3a489a63ac50f5201c31a651e21 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Jun 2018 04:58:50 +0200 Subject: perf/arch/x86: Implement hw_breakpoint_arch_parse() Migrate to the new API in order to remove arch_validate_hwbkpt_settings() that clumsily mixes up architecture validation and commit. Original-patch-by: Andy Lutomirski Signed-off-by: Frederic Weisbecker Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chris Zankel Cc: Jiri Olsa Cc: Joel Fernandes Cc: Linus Torvalds Cc: Mark Rutland Cc: Max Filippov Cc: Michael Ellerman Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/1529981939-8231-4-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_breakpoint.h | 6 +++- arch/x86/kernel/hw_breakpoint.c | 60 ++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 78924596f51f..6c88e8e22678 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -49,11 +49,15 @@ static inline int hw_breakpoint_slots(int type) return HBP_NUM; } +struct perf_event_attr; struct perf_event; struct pmu; extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw); +#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index c43379188f4f..34a5c1715148 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -239,19 +239,20 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX); } -static int arch_build_bp_info(struct perf_event *bp) +static int arch_build_bp_info(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - - info->address = bp->attr.bp_addr; + hw->address = attr->bp_addr; + hw->mask = 0; /* Type */ - switch (bp->attr.bp_type) { + switch (attr->bp_type) { case HW_BREAKPOINT_W: - info->type = X86_BREAKPOINT_WRITE; + hw->type = X86_BREAKPOINT_WRITE; break; case HW_BREAKPOINT_W | HW_BREAKPOINT_R: - info->type = X86_BREAKPOINT_RW; + hw->type = X86_BREAKPOINT_RW; break; case HW_BREAKPOINT_X: /* @@ -259,23 +260,23 @@ static int arch_build_bp_info(struct perf_event *bp) * acceptable for kprobes. On non-kprobes kernels, we don't * allow kernel breakpoints at all. */ - if (bp->attr.bp_addr >= TASK_SIZE_MAX) { + if (attr->bp_addr >= TASK_SIZE_MAX) { #ifdef CONFIG_KPROBES - if (within_kprobe_blacklist(bp->attr.bp_addr)) + if (within_kprobe_blacklist(attr->bp_addr)) return -EINVAL; #else return -EINVAL; #endif } - info->type = X86_BREAKPOINT_EXECUTE; + hw->type = X86_BREAKPOINT_EXECUTE; /* * x86 inst breakpoints need to have a specific undefined len. * But we still need to check userspace is not trying to setup * an unsupported length, to get a range breakpoint for example. */ - if (bp->attr.bp_len == sizeof(long)) { - info->len = X86_BREAKPOINT_LEN_X; + if (attr->bp_len == sizeof(long)) { + hw->len = X86_BREAKPOINT_LEN_X; return 0; } default: @@ -283,28 +284,26 @@ static int arch_build_bp_info(struct perf_event *bp) } /* Len */ - info->mask = 0; - - switch (bp->attr.bp_len) { + switch (attr->bp_len) { case HW_BREAKPOINT_LEN_1: - info->len = X86_BREAKPOINT_LEN_1; + hw->len = X86_BREAKPOINT_LEN_1; break; case HW_BREAKPOINT_LEN_2: - info->len = X86_BREAKPOINT_LEN_2; + hw->len = X86_BREAKPOINT_LEN_2; break; case HW_BREAKPOINT_LEN_4: - info->len = X86_BREAKPOINT_LEN_4; + hw->len = X86_BREAKPOINT_LEN_4; break; #ifdef CONFIG_X86_64 case HW_BREAKPOINT_LEN_8: - info->len = X86_BREAKPOINT_LEN_8; + hw->len = X86_BREAKPOINT_LEN_8; break; #endif default: /* AMD range breakpoint */ - if (!is_power_of_2(bp->attr.bp_len)) + if (!is_power_of_2(attr->bp_len)) return -EINVAL; - if (bp->attr.bp_addr & (bp->attr.bp_len - 1)) + if (attr->bp_addr & (attr->bp_len - 1)) return -EINVAL; if (!boot_cpu_has(X86_FEATURE_BPEXT)) @@ -317,8 +316,8 @@ static int arch_build_bp_info(struct perf_event *bp) * breakpoints, then we'll have to check for kprobe-blacklisted * addresses anywhere in the range. */ - info->mask = bp->attr.bp_len - 1; - info->len = X86_BREAKPOINT_LEN_1; + hw->mask = attr->bp_len - 1; + hw->len = X86_BREAKPOINT_LEN_1; } return 0; @@ -327,22 +326,23 @@ static int arch_build_bp_info(struct perf_event *bp) /* * Validate the arch-specific HW Breakpoint register settings */ -int arch_validate_hwbkpt_settings(struct perf_event *bp) +int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); unsigned int align; int ret; - ret = arch_build_bp_info(bp); + ret = arch_build_bp_info(bp, attr, hw); if (ret) return ret; - switch (info->len) { + switch (hw->len) { case X86_BREAKPOINT_LEN_1: align = 0; - if (info->mask) - align = info->mask; + if (hw->mask) + align = hw->mask; break; case X86_BREAKPOINT_LEN_2: align = 1; @@ -363,7 +363,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * Check that the low-order bits of the address are appropriate * for the alignment implied by len. */ - if (info->address & align) + if (hw->address & align) return -EINVAL; return 0; -- cgit v1.2.3 From cffbb3bd444bc95186b6ab0ed3bf1d5bcf4aa620 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Jun 2018 04:58:57 +0200 Subject: perf/hw_breakpoint: Remove default hw_breakpoint_arch_parse() All architectures have implemented it, we can now remove the poor weak version. Signed-off-by: Frederic Weisbecker Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chris Zankel Cc: Jiri Olsa Cc: Joel Fernandes Cc: Linus Torvalds Cc: Mark Rutland Cc: Max Filippov Cc: Michael Ellerman Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rich Felker Cc: Thomas Gleixner Cc: Will Deacon Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/1529981939-8231-11-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_breakpoint.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 6c88e8e22678..a1f0e90d0818 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -57,7 +57,6 @@ extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); extern int hw_breakpoint_arch_parse(struct perf_event *bp, const struct perf_event_attr *attr, struct arch_hw_breakpoint *hw); -#define hw_breakpoint_arch_parse hw_breakpoint_arch_parse extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); -- cgit v1.2.3 From 3196234039155a33c80e52d7aa41a29dce9a5c51 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 8 Mar 2018 18:15:39 -0800 Subject: perf/x86/intel: Introduce PMU flag for Extended PEBS The Extended PEBS feature, introduced in the Goldmont Plus microarchitecture, supports all events as "Extended PEBS". Introduce flag PMU_FL_PEBS_ALL to indicate the platforms which support extended PEBS. To support all events, it needs to support all constraints for PEBS. To avoid duplicating all the constraints in the PEBS table, making the PEBS code search the normal constraints too. Based-on-code-from: Andi Kleen Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Link: http://lkml.kernel.org/r/20180309021542.11374-1-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 7 +++++++ arch/x86/events/perf_event.h | 1 + 2 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8dbba77e0518..9fd9cb1d2cc8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -871,6 +871,13 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) } } + /* + * Extended PEBS support + * Makes the PEBS code search the normal constraints. + */ + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + return NULL; + return &emptyconstraint; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2430398befd8..156286335351 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -673,6 +673,7 @@ do { \ #define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */ #define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ +#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr -- cgit v1.2.3 From 4f08b6255adb1e379b4fcc8d304ec1263d465677 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 8 Mar 2018 18:15:40 -0800 Subject: perf/x86/intel: Support PEBS on fixed counters The Extended PEBS feature supports PEBS on fixed-function performance counters as well as all four general purpose counters. It has to change the order of PEBS and fixed counter enabling to make sure PEBS is enabled for the fixed counters. The change of the order doesn't impact the behavior of current code on other platforms which don't support extended PEBS. Because there is no dependency among those enable/disable functions. Don't enable IRQ generation (0x8) for MSR_ARCH_PERFMON_FIXED_CTR_CTRL. The PEBS ucode will handle the interrupt generation. Based-on-code-from: Andi Kleen Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Link: http://lkml.kernel.org/r/20180309021542.11374-2-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 86f0c15dcc2d..d5a3124605f5 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2041,15 +2041,15 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); cpuc->intel_cp_status &= ~(1ull << hwc->idx); + if (unlikely(event->attr.precise_ip)) + intel_pmu_pebs_disable(event); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; } x86_pmu_disable_event(event); - - if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_disable(event); } static void intel_pmu_del_event(struct perf_event *event) @@ -2068,17 +2068,19 @@ static void intel_pmu_read_event(struct perf_event *event) x86_perf_event_update(event); } -static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) +static void intel_pmu_enable_fixed(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx - INTEL_PMC_IDX_FIXED; - u64 ctrl_val, bits, mask; + u64 ctrl_val, mask, bits = 0; /* - * Enable IRQ generation (0x8), + * Enable IRQ generation (0x8), if not PEBS, * and enable ring-3 counting (0x2) and ring-0 counting (0x1) * if requested: */ - bits = 0x8ULL; + if (!event->attr.precise_ip) + bits |= 0x8; if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) bits |= 0x2; if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) @@ -2120,14 +2122,14 @@ static void intel_pmu_enable_event(struct perf_event *event) if (unlikely(event_is_checkpointed(event))) cpuc->intel_cp_status |= (1ull << hwc->idx); + if (unlikely(event->attr.precise_ip)) + intel_pmu_pebs_enable(event); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_enable_fixed(hwc); + intel_pmu_enable_fixed(event); return; } - if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_enable(event); - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } -- cgit v1.2.3 From ec71a398c1bf6d8188cb24ebab6f5202523d95e1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 8 Mar 2018 18:15:41 -0800 Subject: perf/x86/intel/ds: Handle PEBS overflow for fixed counters The pebs_drain() need to support fixed counters. The DS Save Area now include "counter reset value" fields for each fixed counters. Extend the related variables (e.g. mask, counters, error) to support fixed counters. There is no extended PEBS in PEBS v2 and earlier PEBS format. Only need to change the code for PEBS v3 and later PEBS format. Extend the pebs_event_reset[] logic to support new "counter reset value" fields. Increase the reserve space for fixed counters. Based-on-code-from: Andi Kleen Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Link: http://lkml.kernel.org/r/20180309021542.11374-3-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 5 ++++- arch/x86/events/intel/ds.c | 36 +++++++++++++++++++++++++++--------- arch/x86/include/asm/intel_ds.h | 3 ++- 3 files changed, 33 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index d5a3124605f5..b1a49a108a59 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2282,7 +2282,10 @@ again: * counters from the GLOBAL_STATUS mask and we always process PEBS * events via drain_pebs(). */ - status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + status &= ~cpuc->pebs_enabled; + else + status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); /* * PEBS overflow sets bit 62 in the global status register diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 9fd9cb1d2cc8..595b96ae8a00 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -903,10 +903,16 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) { struct debug_store *ds = cpuc->ds; u64 threshold; + int reserved; + + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed; + else + reserved = x86_pmu.max_pebs_events; if (cpuc->n_pebs == cpuc->n_large_pebs) { threshold = ds->pebs_absolute_maximum - - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; + reserved * x86_pmu.pebs_record_size; } else { threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; } @@ -970,7 +976,11 @@ void intel_pmu_pebs_enable(struct perf_event *event) * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { - ds->pebs_event_reset[hwc->idx] = + unsigned int idx = hwc->idx; + + if (idx >= INTEL_PMC_IDX_FIXED) + idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + ds->pebs_event_reset[idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } else { ds->pebs_event_reset[hwc->idx] = 0; @@ -1488,9 +1498,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct debug_store *ds = cpuc->ds; struct perf_event *event; void *base, *at, *top; - short counts[MAX_PEBS_EVENTS] = {}; - short error[MAX_PEBS_EVENTS] = {}; - int bit, i; + short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + int bit, i, size; + u64 mask; if (!x86_pmu.pebs_active) return; @@ -1500,6 +1511,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ds->pebs_index = ds->pebs_buffer_base; + mask = (1ULL << x86_pmu.max_pebs_events) - 1; + size = x86_pmu.max_pebs_events; + if (x86_pmu.flags & PMU_FL_PEBS_ALL) { + mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED; + size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; + } + if (unlikely(base >= top)) { /* * The drain_pebs() could be called twice in a short period @@ -1509,7 +1527,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * update the event->count for this case. */ for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, - x86_pmu.max_pebs_events) { + size) { event = cpuc->events[bit]; if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) intel_pmu_save_and_restart_reload(event, 0); @@ -1522,12 +1540,12 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) u64 pebs_status; pebs_status = p->status & cpuc->pebs_enabled; - pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; + pebs_status &= mask; /* PEBS v3 has more accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) { for_each_set_bit(bit, (unsigned long *)&pebs_status, - x86_pmu.max_pebs_events) + size) counts[bit]++; continue; @@ -1575,7 +1593,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) counts[bit]++; } - for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) { + for (bit = 0; bit < size; bit++) { if ((counts[bit] == 0) && (error[bit] == 0)) continue; diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h index 62a9f4966b42..ae26df1c2789 100644 --- a/arch/x86/include/asm/intel_ds.h +++ b/arch/x86/include/asm/intel_ds.h @@ -8,6 +8,7 @@ /* The maximal number of PEBS events: */ #define MAX_PEBS_EVENTS 8 +#define MAX_FIXED_PEBS_EVENTS 3 /* * A debug store configuration. @@ -23,7 +24,7 @@ struct debug_store { u64 pebs_index; u64 pebs_absolute_maximum; u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; + u64 pebs_event_reset[MAX_PEBS_EVENTS + MAX_FIXED_PEBS_EVENTS]; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); -- cgit v1.2.3 From a38b0ba1b7d2e7a6d19877540240e8a4352fc93c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 8 Mar 2018 18:15:42 -0800 Subject: perf/x86/intel: Support Extended PEBS for Goldmont Plus Enable the extended PEBS for Goldmont Plus. There is no specific PEBS constrains for Goldmont Plus. Removing the pebs_constraints for Goldmont Plus. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Link: http://lkml.kernel.org/r/20180309021542.11374-4-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 2 +- arch/x86/events/intel/ds.c | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index b1a49a108a59..035c37481f57 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4077,7 +4077,6 @@ __init int intel_pmu_init(void) intel_pmu_lbr_init_skl(); x86_pmu.event_constraints = intel_slm_event_constraints; - x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints; x86_pmu.extra_regs = intel_glm_extra_regs; /* * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS @@ -4087,6 +4086,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_prec_dist = true; x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.get_event_constraints = glp_get_event_constraints; x86_pmu.cpu_events = glm_events_attrs; /* Goldmont Plus has 4-wide pipeline */ diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 595b96ae8a00..b7b01d762d32 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -713,12 +713,6 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_glp_pebs_event_constraints[] = { - /* Allow all events as PEBS with no flags */ - INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), - EVENT_CONSTRAINT_END -}; - struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ -- cgit v1.2.3