summaryrefslogtreecommitdiffstats
path: root/arch/s390/kernel/mcount64.S
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2014-09-03 13:26:23 +0200
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2014-09-09 08:53:30 +0200
commit3d1e220d08c6a00ffa83d39030b8162f66665b2b (patch)
tree4529f0d568ef53d296476a640d26ae0128bcbacf /arch/s390/kernel/mcount64.S
parentea2f47699082b971769be8b8f38c08b49219f471 (diff)
downloadlinux-3d1e220d08c6a00ffa83d39030b8162f66665b2b.tar.gz
linux-3d1e220d08c6a00ffa83d39030b8162f66665b2b.tar.bz2
linux-3d1e220d08c6a00ffa83d39030b8162f66665b2b.zip
s390/ftrace: optimize mcount code
Reduce the number of executed instructions within the mcount block if function tracing is enabled. We achieve that by using a non-standard C function call ABI. Since the called function is also written in assembler this is not a problem. This also allows to replace the unconditional store at the beginning of the mcount block with a larl instruction, which doesn't touch memory. In theory we could also patch the first instruction of the mcount block to enable and disable function tracing. However this would break kprobes. This could be fixed with implementing the "kprobes_on_ftrace" feature; however keeping the odd jprobes working seems not to be possible without a lot of code churn. Therefore keep the code easy and simply accept one wasted 1-cycle "larl" instruction per function prologue. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/kernel/mcount64.S')
-rw-r--r--arch/s390/kernel/mcount64.S30
1 files changed, 14 insertions, 16 deletions
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
index 8cf976f83a10..07abe8d464d4 100644
--- a/arch/s390/kernel/mcount64.S
+++ b/arch/s390/kernel/mcount64.S
@@ -16,7 +16,6 @@ ENTRY(ftrace_stub)
br %r14
#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
-#define STACK_PARENT_IP (STACK_FRAME_SIZE + 8)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
@@ -31,40 +30,39 @@ ENTRY(ftrace_caller)
aghi %r15,-STACK_FRAME_SIZE
stg %r1,__SF_BACKCHAIN(%r15)
stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
- stmg %r0,%r13,STACK_PTREGS_GPRS(%r15)
- stg %r14,(STACK_PTREGS_PSW+8)(%r15)
+ stg %r0,(STACK_PTREGS_PSW+8)(%r15)
+ stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- aghik %r2,%r14,-MCOUNT_INSN_SIZE
+ aghik %r2,%r0,-MCOUNT_INSN_SIZE
lgrl %r4,function_trace_op
- lgrl %r14,ftrace_trace_function
+ lgrl %r1,ftrace_trace_function
#else
- lgr %r2,%r14
+ lgr %r2,%r0
aghi %r2,-MCOUNT_INSN_SIZE
larl %r4,function_trace_op
lg %r4,0(%r4)
- larl %r14,ftrace_trace_function
- lg %r14,0(%r14)
+ larl %r1,ftrace_trace_function
+ lg %r1,0(%r1)
#endif
- lg %r3,STACK_PARENT_IP(%r15)
+ lgr %r3,%r14
la %r5,STACK_PTREGS(%r15)
- basr %r14,%r14
+ basr %r14,%r1
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
# The j instruction gets runtime patched to a nop instruction.
# See ftrace_enable_ftrace_graph_caller. The patched instruction is:
# j .+4
ENTRY(ftrace_graph_caller)
j ftrace_graph_caller_end
- lg %r2,STACK_PARENT_IP(%r15)
+ lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
lg %r3,(STACK_PTREGS_PSW+8)(%r15)
brasl %r14,prepare_ftrace_return
- stg %r2,STACK_PARENT_IP(%r15)
+ stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
ftrace_graph_caller_end:
.globl ftrace_graph_caller_end
#endif
- lmg %r0,%r13,STACK_PTREGS_GPRS(%r15)
- lg %r14,(STACK_PTREGS_PSW+8)(%r15)
- aghi %r15,STACK_FRAME_SIZE
- br %r14
+ lg %r1,(STACK_PTREGS_PSW+8)(%r15)
+ lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
+ br %r1
#ifdef CONFIG_FUNCTION_GRAPH_TRACER