diff options
author | Robin Getz <robin.getz@analog.com> | 2010-05-04 14:59:21 +0000 |
---|---|---|
committer | Mike Frysinger <vapier@gentoo.org> | 2010-05-22 14:19:11 -0400 |
commit | 648eee52ccdc623e21b920d6048e93490a4860a7 (patch) | |
tree | 1d106f156abb6c7a0169a392f4856cd63893e079 /arch/blackfin | |
parent | c70dcabc8eba18113a4735e7b1bd09f7493e38f0 (diff) | |
download | linux-648eee52ccdc623e21b920d6048e93490a4860a7.tar.gz linux-648eee52ccdc623e21b920d6048e93490a4860a7.tar.bz2 linux-648eee52ccdc623e21b920d6048e93490a4860a7.zip |
Blackfin: optimize strncpy a bit
Add a little strncpy optimization which can easily cut boot time by 20%.
When the kernel is booting with initramfs, it builds up the filesystem
from a cpio archive by calling strncpy_from_user() via fs/namei.c's
do_getname() on every file in the archive (which can be lots) with a
length of PATH_MAX (1024). This causes the dest of the strncpy to be
padded with many NUL bytes.
This optimization mostly causes these NUL bytes to be padded with a call
to memset() which is already optimized for filling memory quickly, but
the hardware loop helps a little bit as well.
Boot time measured with 'loglevel=0' so UART speed doesn't get in the way.
Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Diffstat (limited to 'arch/blackfin')
-rw-r--r-- | arch/blackfin/lib/memset.S | 1 | ||||
-rw-r--r-- | arch/blackfin/lib/strncpy.S | 59 |
2 files changed, 47 insertions, 13 deletions
diff --git a/arch/blackfin/lib/memset.S b/arch/blackfin/lib/memset.S index c30d99b10969..eab1bef3f5bf 100644 --- a/arch/blackfin/lib/memset.S +++ b/arch/blackfin/lib/memset.S @@ -20,6 +20,7 @@ * R1 = filler byte * R2 = count * Favours word aligned data. + * The strncpy assumes that I0 and I1 are not used in this function */ ENTRY(_memset) diff --git a/arch/blackfin/lib/strncpy.S b/arch/blackfin/lib/strncpy.S index 39fbbe6523e5..f3931d50b4a7 100644 --- a/arch/blackfin/lib/strncpy.S +++ b/arch/blackfin/lib/strncpy.S @@ -5,12 +5,14 @@ */ #include <linux/linkage.h> +#include <asm/context.S> /* void *strncpy(char *dest, const char *src, size_t n); * R0 = address (dest) * R1 = address (src) * R2 = size - * Returns a pointer to the destination string dest + * Returns a pointer (R0) to the destination string dest + * we do this by not changing R0 */ #ifdef CONFIG_STRNCPY_L1 @@ -24,29 +26,60 @@ ENTRY(_strncpy) CC = R2 == 0; if CC JUMP 4f; + + P2 = R2 ; /* size */ P0 = R0 ; /* dst*/ P1 = R1 ; /* src*/ + LSETUP (1f, 2f) LC0 = P2; 1: R1 = B [P1++] (Z); B [P0++] = R1; - CC = R1; - if ! cc jump 2f; - R2 += -1; - CC = R2 == 0; - if ! cc jump 1b (bp); - jump 4f; + CC = R1 == 0; 2: - /* if src is shorter than n, we need to null pad bytes in dest */ - R1 = 0; + if CC jump 3f; + + RTS; + + /* if src is shorter than n, we need to null pad bytes in dest + * but, we can get here when the last byte is zero, and we don't + * want to copy an extra byte at the end, so we need to check + */ 3: + R2 = LC0; + CC = R2 + if ! CC jump 6f; + + /* if the required null padded portion is small, do it here, rather than + * handling the overhead of memset (which is OK when things are big). + */ + R3 = 0x20; + CC = R2 < R3; + IF CC jump 4f; + R2 += -1; - CC = R2 == 0; - if cc jump 4f; - B [P0++] = R1; - jump 3b; + + /* Set things up for memset + * R0 = address + * R1 = filler byte (this case it's zero, set above) + * R2 = count (set above) + */ + + I1 = R0; + R0 = RETS; + I0 = R0; + R0 = P0; + pseudo_long_call _memset, p0; + R0 = I0; + RETS = R0; + R0 = I1; + RTS; 4: + LSETUP(5f, 5f) LC0; +5: + B [P0++] = R1; +6: RTS; ENDPROC(_strncpy) |