summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorOndrej Mosnáček <omosnacek@gmail.com>2017-04-02 21:19:13 +0200
committerHerbert Xu <herbert@gondor.apana.org.au>2017-04-05 21:58:35 +0800
commitacb9b159c784dc0033ede0dadde876ebd93aca4c (patch)
tree4bc26c590b0fdcb517e996197958aa2654496f06 /crypto
parentf275d3856cf597419293cd7d95aa628d3073f556 (diff)
downloadlinux-acb9b159c784dc0033ede0dadde876ebd93aca4c.tar.gz
linux-acb9b159c784dc0033ede0dadde876ebd93aca4c.tar.bz2
linux-acb9b159c784dc0033ede0dadde876ebd93aca4c.zip
crypto: gf128mul - define gf128mul_x_* in gf128mul.h
The gf128mul_x_ble function is currently defined in gf128mul.c, because it depends on the gf128mul_table_be multiplication table. However, since the function is very small and only uses two values from the table, it is better for it to be defined as inline function in gf128mul.h. That way, the function can be inlined by the compiler for better performance. For consistency, the other gf128mul_x_* functions are also moved to the header file. In addition, the code is rewritten to be constant-time. After this change, the speed of the generic 'xts(aes)' implementation increased from ~225 MiB/s to ~235 MiB/s (measured using 'cryptsetup benchmark -c aes-xts-plain64' on an Intel system with CRYPTO_AES_X86_64 and CRYPTO_AES_NI_INTEL disabled). Signed-off-by: Ondrej Mosnacek <omosnacek@gmail.com> Reviewd-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto')
-rw-r--r--crypto/gf128mul.c33
1 files changed, 1 insertions, 32 deletions
diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index 04facc0690aa..dc012129c063 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -130,43 +130,12 @@ static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
/*
- * The following functions multiply a field element by x or by x^8 in
+ * The following functions multiply a field element by x^8 in
* the polynomial field representation. They use 64-bit word operations
* to gain speed but compensate for machine endianness and hence work
* correctly on both styles of machine.
*/
-static void gf128mul_x_lle(be128 *r, const be128 *x)
-{
- u64 a = be64_to_cpu(x->a);
- u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_le[(b << 7) & 0xff];
-
- r->b = cpu_to_be64((b >> 1) | (a << 63));
- r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
-}
-
-static void gf128mul_x_bbe(be128 *r, const be128 *x)
-{
- u64 a = be64_to_cpu(x->a);
- u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_be[a >> 63];
-
- r->a = cpu_to_be64((a << 1) | (b >> 63));
- r->b = cpu_to_be64((b << 1) ^ _tt);
-}
-
-void gf128mul_x_ble(be128 *r, const be128 *x)
-{
- u64 a = le64_to_cpu(x->a);
- u64 b = le64_to_cpu(x->b);
- u64 _tt = gf128mul_table_be[b >> 63];
-
- r->a = cpu_to_le64((a << 1) ^ _tt);
- r->b = cpu_to_le64((b << 1) | (a >> 63));
-}
-EXPORT_SYMBOL(gf128mul_x_ble);
-
static void gf128mul_x8_lle(be128 *x)
{
u64 a = be64_to_cpu(x->a);