lib/raid6/altivec: Add vpermxor implementation for raid6 Q syndrome

This patch uses the vpermxor instruction to optimise the raid6 Q syndrome. This instruction was made available with POWER8, ISA version 2.07. It allows for both vperm and vxor instructions to be done in a single instruction. This has been tested for correctness on a ppc64le vm with a basic RAID6 setup containing 5 drives. The performance benchmarks are from the raid6test in the /lib/raid6/test directory. These results are from an IBM Firestone machine with ppc64le architecture. The benchmark results show a 35% speed increase over the best existing algorithm for powerpc (altivec). The raid6test has also been run on a big-endian ppc64 vm to ensure it also works for big-endian architectures. Performance benchmarks: raid6: altivecx4 gen() 18773 MB/s raid6: altivecx8 gen() 19438 MB/s raid6: vpermxor4 gen() 25112 MB/s raid6: vpermxor8 gen() 26279 MB/s Signed-off-by: Matt Brown <matthew.brown.dev@gmail.com> Reviewed-by: Daniel Axtens <dja@axtens.net> [mpe: Add VPERMXOR macro so we can build with old binutils] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
author: Matt Brown <matthew.brown.dev@gmail.com> 2017-08-04 13:42:32 +1000
committer: Michael Ellerman <mpe@ellerman.id.au> 2018-03-20 16:47:25 +1100
commit: 751ba79cc552c146595cd439b21c4ff8998c3b69 (patch)
tree: fc7aa71ed1ca788ab3a9c553021f7c876ccd4115 /lib/raid6/test/Makefile
parent: 7004263bd4f4c79da9ca2a1d04d38d4d6ed609ab (diff)
download: linux-751ba79cc552c146595cd439b21c4ff8998c3b69.tar.gz
linux-751ba79cc552c146595cd439b21c4ff8998c3b69.tar.bz2
linux-751ba79cc552c146595cd439b21c4ff8998c3b69.zip
1 files changed, 15 insertions, 2 deletions
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index be1010bdc435..ef6d0e00f189 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -48,7 +48,8 @@ else
                          gcc -c -x c - >&/dev/null && \
                          rm ./-.o && echo yes)
         ifeq ($(HAS_ALTIVEC),yes)
-                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+                        vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
         endif
 endif
 ifeq ($(ARCH),tilegx)
@@ -98,6 +99,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
 
+vpermxor1.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
 int1.c: int.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=1 < int.uc > $@
 
@@ -123,7 +136,7 @@ tables.c: mktables
 	./mktables > tables.c
 
 clean:
-	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
 	rm -f tilegx*.c
 
 spotless: clean
author	Matt Brown <matthew.brown.dev@gmail.com>	2017-08-04 13:42:32 +1000
committer	Michael Ellerman <mpe@ellerman.id.au>	2018-03-20 16:47:25 +1100
commit	751ba79cc552c146595cd439b21c4ff8998c3b69 (patch)
tree	fc7aa71ed1ca788ab3a9c553021f7c876ccd4115 /lib/raid6/test/Makefile
parent	7004263bd4f4c79da9ca2a1d04d38d4d6ed609ab (diff)
download	linux-751ba79cc552c146595cd439b21c4ff8998c3b69.tar.gz linux-751ba79cc552c146595cd439b21c4ff8998c3b69.tar.bz2 linux-751ba79cc552c146595cd439b21c4ff8998c3b69.zip