summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
authorJoonwoo Park <joonwoop@codeaurora.org>2014-10-21 01:59:03 +0100
committerWill Deacon <will.deacon@arm.com>2014-11-06 17:25:27 +0000
commit70ddb63a88bfd45eb6abe36e2bf4f8f351a447d7 (patch)
tree4dcae5cd2781e3265f0a5a9effc4060943ae04c5 /arch/arm64/kernel
parent4ee20980812b5b1800221996ec438689f61e98b4 (diff)
downloadlinux-70ddb63a88bfd45eb6abe36e2bf4f8f351a447d7.tar.gz
linux-70ddb63a88bfd45eb6abe36e2bf4f8f351a447d7.tar.bz2
linux-70ddb63a88bfd45eb6abe36e2bf4f8f351a447d7.zip
arm64: optimize memcpy_{from,to}io() and memset_io()
Optimize memcpy_{from,to}io() and memset_io() by transferring in 64 bit as much as possible with minimized barrier usage. This simplest optimization brings faster throughput compare to current byte-by-byte read and write with barrier in the loop. Code's skeleton is taken from the powerpc. Link: http://lkml.kernel.org/p/20141020133304.GH23751@e104818-lin.cambridge.arm.com Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Reviewed-by: Trilok Soni <tsoni@codeaurora.org> Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/io.c66
1 files changed, 56 insertions, 10 deletions
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index 7d37ead4d199..354be2a872ae 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -25,12 +25,26 @@
*/
void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
{
- unsigned char *t = to;
- while (count) {
+ while (count && (!IS_ALIGNED((unsigned long)from, 8) ||
+ !IS_ALIGNED((unsigned long)to, 8))) {
+ *(u8 *)to = __raw_readb(from);
+ from++;
+ to++;
count--;
- *t = readb(from);
- t++;
+ }
+
+ while (count >= 8) {
+ *(u64 *)to = __raw_readq(from);
+ from += 8;
+ to += 8;
+ count -= 8;
+ }
+
+ while (count) {
+ *(u8 *)to = __raw_readb(from);
from++;
+ to++;
+ count--;
}
}
EXPORT_SYMBOL(__memcpy_fromio);
@@ -40,12 +54,26 @@ EXPORT_SYMBOL(__memcpy_fromio);
*/
void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
{
- const unsigned char *f = from;
- while (count) {
+ while (count && (!IS_ALIGNED((unsigned long)to, 8) ||
+ !IS_ALIGNED((unsigned long)from, 8))) {
+ __raw_writeb(*(volatile u8 *)from, to);
+ from++;
+ to++;
count--;
- writeb(*f, to);
- f++;
+ }
+
+ while (count >= 8) {
+ __raw_writeq(*(volatile u64 *)from, to);
+ from += 8;
+ to += 8;
+ count -= 8;
+ }
+
+ while (count) {
+ __raw_writeb(*(volatile u8 *)from, to);
+ from++;
to++;
+ count--;
}
}
EXPORT_SYMBOL(__memcpy_toio);
@@ -55,10 +83,28 @@ EXPORT_SYMBOL(__memcpy_toio);
*/
void __memset_io(volatile void __iomem *dst, int c, size_t count)
{
- while (count) {
+ u64 qc = (u8)c;
+
+ qc |= qc << 8;
+ qc |= qc << 16;
+ qc |= qc << 32;
+
+ while (count && !IS_ALIGNED((unsigned long)dst, 8)) {
+ __raw_writeb(c, dst);
+ dst++;
count--;
- writeb(c, dst);
+ }
+
+ while (count >= 8) {
+ __raw_writeq(qc, dst);
+ dst += 8;
+ count -= 8;
+ }
+
+ while (count) {
+ __raw_writeb(c, dst);
dst++;
+ count--;
}
}
EXPORT_SYMBOL(__memset_io);