diff options
author | Heiko Carstens <heiko.carstens@de.ibm.com> | 2016-12-07 13:45:38 +0100 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-12-12 12:11:32 +0100 |
commit | b4623d4e5b2370fcf1200cbf832aaa53f6e96ef3 (patch) | |
tree | 23f0a9923f2f77cafc9ac097b1c9dd350ec0a55b /arch/s390/lib | |
parent | 82897ede9235d31c50074ce1da81828aa2f3d70c (diff) | |
download | linux-b4623d4e5b2370fcf1200cbf832aaa53f6e96ef3.tar.gz linux-b4623d4e5b2370fcf1200cbf832aaa53f6e96ef3.tar.bz2 linux-b4623d4e5b2370fcf1200cbf832aaa53f6e96ef3.zip |
s390: provide memmove implementation
Provide an s390 specific memmove implementation which is faster than
the generic implementation which copies byte-wise.
For non-destructive (as defined by the mvc instruction) memmove
operations the following table compares the old default implementation
versus the new s390 specific implementation:
size old new
1 1ns 8ns
2 2ns 8ns
4 4ns 8ns
8 7ns 8ns
16 17ns 8ns
32 35ns 8ns
64 65ns 9ns
128 146ns 10ns
256 298ns 11ns
512 537ns 11ns
1024 1193ns 19ns
2048 2405ns 36ns
So only for very small sizes the old implementation is faster. For
overlapping memmoves, where the mvc instruction can't be used, the new
implementation is as slow as the old one.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/lib')
-rw-r--r-- | arch/s390/lib/mem.S | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index be9fa65bfac4..7422a706f310 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -8,6 +8,45 @@ #include <asm/export.h> /* + * void *memmove(void *dest, const void *src, size_t n) + */ +ENTRY(memmove) + ltgr %r4,%r4 + lgr %r1,%r2 + bzr %r14 + clgr %r2,%r3 + jnh .Lmemmove_forward + la %r5,0(%r4,%r3) + clgr %r2,%r5 + jl .Lmemmove_reverse +.Lmemmove_forward: + aghi %r4,-1 + srlg %r0,%r4,8 + ltgr %r0,%r0 + jz .Lmemmove_rest +.Lmemmove_loop: + mvc 0(256,%r1),0(%r3) + la %r1,256(%r1) + la %r3,256(%r3) + brctg %r0,.Lmemmove_loop +.Lmemmove_rest: + larl %r5,.Lmemmove_mvc + ex %r4,0(%r5) + br %r14 +.Lmemmove_reverse: + aghi %r4,-1 +.Lmemmove_reverse_loop: + ic %r0,0(%r4,%r3) + stc %r0,0(%r4,%r1) + brctg %r4,.Lmemmove_reverse_loop + ic %r0,0(%r4,%r3) + stc %r0,0(%r4,%r1) + br %r14 +.Lmemmove_mvc: + mvc 0(1,%r1),0(%r3) +EXPORT_SYMBOL(memmove) + +/* * memset implementation * * This code corresponds to the C construct below. We do distinguish |