summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-24 09:28:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-24 09:28:01 -0700
commit474095e46cd14421821da3201a9fd6a4c070996b (patch)
tree7203d36f53c376a96099ed0310787b1fb0c4f7a5 /lib
parentd56a669ca59c37ed0a7282a251b2f2f22533343a (diff)
parent9ffc8f7cb9647b13dfe4d1ad0d5e1427bb8b46d6 (diff)
downloadlinux-474095e46cd14421821da3201a9fd6a4c070996b.tar.gz
linux-474095e46cd14421821da3201a9fd6a4c070996b.tar.bz2
linux-474095e46cd14421821da3201a9fd6a4c070996b.zip
Merge tag 'md/4.1' of git://neil.brown.name/md
Pull md updates from Neil Brown: "More updates that usual this time. A few have performance impacts which hould mostly be positive, but RAID5 (in particular) can be very work-load ensitive... We'll have to wait and see. Highlights: - "experimental" code for managing md/raid1 across a cluster using DLM. Code is not ready for general use and triggers a WARNING if used. However it is looking good and mostly done and having in mainline will help co-ordinate development. - RAID5/6 can now batch multiple (4K wide) stripe_heads so as to handle a full (chunk wide) stripe as a single unit. - RAID6 can now perform read-modify-write cycles which should help performance on larger arrays: 6 or more devices. - RAID5/6 stripe cache now grows and shrinks dynamically. The value set is used as a minimum. - Resync is now allowed to go a little faster than the 'mininum' when there is competing IO. How much faster depends on the speed of the devices, so the effective minimum should scale with device speed to some extent" * tag 'md/4.1' of git://neil.brown.name/md: (58 commits) md/raid5: don't do chunk aligned read on degraded array. md/raid5: allow the stripe_cache to grow and shrink. md/raid5: change ->inactive_blocked to a bit-flag. md/raid5: move max_nr_stripes management into grow_one_stripe and drop_one_stripe md/raid5: pass gfp_t arg to grow_one_stripe() md/raid5: introduce configuration option rmw_level md/raid5: activate raid6 rmw feature md/raid6 algorithms: xor_syndrome() for SSE2 md/raid6 algorithms: xor_syndrome() for generic int md/raid6 algorithms: improve test program md/raid6 algorithms: delta syndrome functions raid5: handle expansion/resync case with stripe batching raid5: handle io error of batch list RAID5: batch adjacent full stripe write raid5: track overwrite disk count raid5: add a new flag to track if a stripe can be batched raid5: use flex_array for scribble data md raid0: access mddev->queue (request queue member) conditionally because it is not set when accessed from dm-raid md: allow resync to go faster when there is competing IO. md: remove 'go_faster' option from ->sync_request() ...
Diffstat (limited to 'lib')
-rw-r--r--lib/raid6/algos.c41
-rw-r--r--lib/raid6/altivec.uc1
-rw-r--r--lib/raid6/avx2.c3
-rw-r--r--lib/raid6/int.uc41
-rw-r--r--lib/raid6/mmx.c2
-rw-r--r--lib/raid6/neon.c1
-rw-r--r--lib/raid6/sse1.c2
-rw-r--r--lib/raid6/sse2.c227
-rw-r--r--lib/raid6/test/test.c51
-rw-r--r--lib/raid6/tilegx.uc1
10 files changed, 347 insertions, 23 deletions
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index dbef2314901e..975c6e0434bd 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
static inline const struct raid6_calls *raid6_choose_gen(
void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
{
- unsigned long perf, bestperf, j0, j1;
+ unsigned long perf, bestgenperf, bestxorperf, j0, j1;
+ int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
const struct raid6_calls *const *algo;
const struct raid6_calls *best;
- for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+ for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
if (!best || (*algo)->prefer >= best->prefer) {
if ((*algo)->valid && !(*algo)->valid())
continue;
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
}
preempt_enable();
- if (perf > bestperf) {
- bestperf = perf;
+ if (perf > bestgenperf) {
+ bestgenperf = perf;
best = *algo;
}
- pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name,
+ pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+
+ if (!(*algo)->xor_syndrome)
+ continue;
+
+ perf = 0;
+
+ preempt_disable();
+ j0 = jiffies;
+ while ((j1 = jiffies) == j0)
+ cpu_relax();
+ while (time_before(jiffies,
+ j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+ (*algo)->xor_syndrome(disks, start, stop,
+ PAGE_SIZE, *dptrs);
+ perf++;
+ }
+ preempt_enable();
+
+ if (best == *algo)
+ bestxorperf = perf;
+
+ pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
+ (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
}
}
if (best) {
- pr_info("raid6: using algorithm %s (%ld MB/s)\n",
+ pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
best->name,
- (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+ (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+ if (best->xor_syndrome)
+ pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
+ (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
raid6_call = *best;
} else
pr_err("raid6: Yikes! No algorithm found!\n");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 7cc12b532e95..bec27fce7501 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
const struct raid6_calls raid6_altivec$# = {
raid6_altivec$#_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_altivec,
"altivecx$#",
0
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
index bc3b1dd436eb..76734004358d 100644
--- a/lib/raid6/avx2.c
+++ b/lib/raid6/avx2.c
@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_avx2x1 = {
raid6_avx21_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_avx2,
"avx2x1",
1 /* Has cache hints */
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_avx2x2 = {
raid6_avx22_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_avx2,
"avx2x2",
1 /* Has cache hints */
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_avx2x4 = {
raid6_avx24_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_avx2,
"avx2x4",
1 /* Has cache hints */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
index 5b50f8dfc5d2..558aeac9342a 100644
--- a/lib/raid6/int.uc
+++ b/lib/raid6/int.uc
@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
}
}
+static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+ /* P/Q data pages */
+ wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+ for ( z = z0-1 ; z >= start ; z-- ) {
+ wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ wp$$ ^= wd$$;
+ w2$$ = MASK(wq$$);
+ w1$$ = SHLBYTE(wq$$);
+ w2$$ &= NBYTES(0x1d);
+ w1$$ ^= w2$$;
+ wq$$ = w1$$ ^ wd$$;
+ }
+ /* P/Q left side optimization */
+ for ( z = start-1 ; z >= 0 ; z-- ) {
+ w2$$ = MASK(wq$$);
+ w1$$ = SHLBYTE(wq$$);
+ w2$$ &= NBYTES(0x1d);
+ wq$$ = w1$$ ^ w2$$;
+ }
+ *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ }
+
+}
+
const struct raid6_calls raid6_intx$# = {
raid6_int$#_gen_syndrome,
- NULL, /* always valid */
+ raid6_int$#_xor_syndrome,
+ NULL, /* always valid */
"int" NSTRING "x$#",
0
};
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 590c71c9e200..b3b0e1fcd3af 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_mmxx1 = {
raid6_mmx1_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_mmx,
"mmxx1",
0
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_mmxx2 = {
raid6_mmx2_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_mmx,
"mmxx2",
0
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
index 36ad4705df1a..d9ad6ee284f4 100644
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -42,6 +42,7 @@
} \
struct raid6_calls const raid6_neonx ## _n = { \
raid6_neon ## _n ## _gen_syndrome, \
+ NULL, /* XOR not yet implemented */ \
raid6_have_neon, \
"neonx" #_n, \
0 \
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index f76297139445..9025b8ca9aa3 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_sse1x1 = {
raid6_sse11_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_sse1_or_mmxext,
"sse1x1",
1 /* Has cache hints */
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_sse1x2 = {
raid6_sse12_gen_syndrome,
+ NULL, /* XOR not yet implemented */
raid6_have_sse1_or_mmxext,
"sse1x2",
1 /* Has cache hints */
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index 85b82c85f28e..1d2276b007ee 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
+
+static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+ {
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+
+ for ( d = 0 ; d < bytes ; d += 16 ) {
+ asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+ asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+ asm volatile("pxor %xmm4,%xmm2");
+ /* P/Q data pages */
+ for ( z = z0-1 ; z >= start ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+ asm volatile("pxor %xmm5,%xmm2");
+ asm volatile("pxor %xmm5,%xmm4");
+ }
+ /* P/Q left side optimization */
+ for ( z = start-1 ; z >= 0 ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pxor %xmm5,%xmm4");
+ }
+ asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+ /* Don't use movntdq for r/w memory area < cache line */
+ asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
+ asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
const struct raid6_calls raid6_sse2x1 = {
raid6_sse21_gen_syndrome,
+ raid6_sse21_xor_syndrome,
raid6_have_sse2,
"sse2x1",
1 /* Has cache hints */
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
+ static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+ {
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+
+ for ( d = 0 ; d < bytes ; d += 32 ) {
+ asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+ asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
+ asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+ asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
+ asm volatile("pxor %xmm4,%xmm2");
+ asm volatile("pxor %xmm6,%xmm3");
+ /* P/Q data pages */
+ for ( z = z0-1 ; z >= start ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+ asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+ asm volatile("pxor %xmm5,%xmm2");
+ asm volatile("pxor %xmm7,%xmm3");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ }
+ /* P/Q left side optimization */
+ for ( z = start-1 ; z >= 0 ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ }
+ asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+ asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
+ /* Don't use movntdq for r/w memory area < cache line */
+ asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
+ asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
+ asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
+ asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+ }
+
const struct raid6_calls raid6_sse2x2 = {
raid6_sse22_gen_syndrome,
+ raid6_sse22_xor_syndrome,
raid6_have_sse2,
"sse2x2",
1 /* Has cache hints */
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
kernel_fpu_end();
}
+ static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+ {
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
+
+ for ( d = 0 ; d < bytes ; d += 64 ) {
+ asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+ asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
+ asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
+ asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
+ asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+ asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
+ asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
+ asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
+ asm volatile("pxor %xmm4,%xmm2");
+ asm volatile("pxor %xmm6,%xmm3");
+ asm volatile("pxor %xmm12,%xmm10");
+ asm volatile("pxor %xmm14,%xmm11");
+ /* P/Q data pages */
+ for ( z = z0-1 ; z >= start ; z-- ) {
+ asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
+ asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ asm volatile("pxor %xmm13,%xmm13");
+ asm volatile("pxor %xmm15,%xmm15");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("pcmpgtb %xmm12,%xmm13");
+ asm volatile("pcmpgtb %xmm14,%xmm15");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("paddb %xmm12,%xmm12");
+ asm volatile("paddb %xmm14,%xmm14");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pand %xmm0,%xmm13");
+ asm volatile("pand %xmm0,%xmm15");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("pxor %xmm13,%xmm12");
+ asm volatile("pxor %xmm15,%xmm14");
+ asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+ asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+ asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
+ asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
+ asm volatile("pxor %xmm5,%xmm2");
+ asm volatile("pxor %xmm7,%xmm3");
+ asm volatile("pxor %xmm13,%xmm10");
+ asm volatile("pxor %xmm15,%xmm11");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("pxor %xmm13,%xmm12");
+ asm volatile("pxor %xmm15,%xmm14");
+ }
+ asm volatile("prefetchnta %0" :: "m" (q[d]));
+ asm volatile("prefetchnta %0" :: "m" (q[d+32]));
+ /* P/Q left side optimization */
+ for ( z = start-1 ; z >= 0 ; z-- ) {
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ asm volatile("pxor %xmm13,%xmm13");
+ asm volatile("pxor %xmm15,%xmm15");
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("pcmpgtb %xmm12,%xmm13");
+ asm volatile("pcmpgtb %xmm14,%xmm15");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("paddb %xmm12,%xmm12");
+ asm volatile("paddb %xmm14,%xmm14");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pand %xmm0,%xmm13");
+ asm volatile("pand %xmm0,%xmm15");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("pxor %xmm13,%xmm12");
+ asm volatile("pxor %xmm15,%xmm14");
+ }
+ asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+ asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+ asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
+ asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
+ asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+ asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
+ asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
+ asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
+ asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+ asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+ asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
+ asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
+ }
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+ }
+
+
const struct raid6_calls raid6_sse2x4 = {
raid6_sse24_gen_syndrome,
+ raid6_sse24_xor_syndrome,
raid6_have_sse2,
"sse2x4",
1 /* Has cache hints */
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 5a485b7a7d3c..3bebbabdb510 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
char data[NDISKS][PAGE_SIZE];
char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
-static void makedata(void)
+static void makedata(int start, int stop)
{
int i, j;
- for (i = 0; i < NDISKS; i++) {
+ for (i = start; i <= stop; i++) {
for (j = 0; j < PAGE_SIZE; j++)
data[i][j] = rand();
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
{
const struct raid6_calls *const *algo;
const struct raid6_recov_calls *const *ra;
- int i, j;
+ int i, j, p1, p2;
int err = 0;
- makedata();
+ makedata(0, NDISKS-1);
for (ra = raid6_recov_algos; *ra; ra++) {
if ((*ra)->valid && !(*ra)->valid())
continue;
+
raid6_2data_recov = (*ra)->data2;
raid6_datap_recov = (*ra)->datap;
printf("using recovery %s\n", (*ra)->name);
for (algo = raid6_algos; *algo; algo++) {
- if (!(*algo)->valid || (*algo)->valid()) {
- raid6_call = **algo;
+ if ((*algo)->valid && !(*algo)->valid())
+ continue;
+
+ raid6_call = **algo;
+
+ /* Nuke syndromes */
+ memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+
+ /* Generate assumed good syndrome */
+ raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
+ (void **)&dataptrs);
+
+ for (i = 0; i < NDISKS-1; i++)
+ for (j = i+1; j < NDISKS; j++)
+ err += test_disks(i, j);
+
+ if (!raid6_call.xor_syndrome)
+ continue;
+
+ for (p1 = 0; p1 < NDISKS-2; p1++)
+ for (p2 = p1; p2 < NDISKS-2; p2++) {
- /* Nuke syndromes */
- memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+ /* Simulate rmw run */
+ raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
+ (void **)&dataptrs);
+ makedata(p1, p2);
+ raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
+ (void **)&dataptrs);
- /* Generate assumed good syndrome */
- raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
- (void **)&dataptrs);
+ for (i = 0; i < NDISKS-1; i++)
+ for (j = i+1; j < NDISKS; j++)
+ err += test_disks(i, j);
+ }
- for (i = 0; i < NDISKS-1; i++)
- for (j = i+1; j < NDISKS; j++)
- err += test_disks(i, j);
- }
}
printf("\n");
}
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc
index e7c29459cbcd..2dd291a11264 100644
--- a/lib/raid6/tilegx.uc
+++ b/lib/raid6/tilegx.uc
@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
const struct raid6_calls raid6_tilegx$# = {
raid6_tilegx$#_gen_syndrome,
+ NULL, /* XOR not yet implemented */
NULL,
"tilegx$#",
0