From dd333449d0fb667c5250c42488a7e90470e16c77 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:18 -0800 Subject: crypto: chacha20-generic - add HChaCha20 library function Refactor the unkeyed permutation part of chacha20_block() into its own function, then add hchacha20_block() which is the ChaCha equivalent of HSalsa20 and is an intermediate step towards XChaCha20 (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha20 skips the final addition of the initial state, and outputs only certain words of the state. It should not be used for streaming directly. Reviewed-by: Ard Biesheuvel Acked-by: Martin Willi Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/chacha20.c | 50 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/chacha20.c b/lib/chacha20.c index d907fec6a9ed..6a484e16171d 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -1,5 +1,5 @@ /* - * ChaCha20 256-bit cipher algorithm, RFC7539 + * The "hash function" used as the core of the ChaCha20 stream cipher (RFC7539) * * Copyright (C) 2015 Martin Willi * @@ -16,14 +16,10 @@ #include #include -void chacha20_block(u32 *state, u8 *stream) +static void chacha20_permute(u32 *x) { - u32 x[16]; int i; - for (i = 0; i < ARRAY_SIZE(x); i++) - x[i] = state[i]; - for (i = 0; i < 20; i += 2) { x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); @@ -65,6 +61,25 @@ void chacha20_block(u32 *state, u8 *stream) x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); } +} + +/** + * chacha20_block - generate one keystream block and increment block counter + * @state: input state matrix (16 32-bit words) + * @stream: output keystream block (64 bytes) + * + * This is the ChaCha20 core, a function from 64-byte strings to 64-byte + * strings. The caller has already converted the endianness of the input. This + * function also handles incrementing the block counter in the input matrix. + */ +void chacha20_block(u32 *state, u8 *stream) +{ + u32 x[16]; + int i; + + memcpy(x, state, 64); + + chacha20_permute(x); for (i = 0; i < ARRAY_SIZE(x); i++) put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); @@ -72,3 +87,26 @@ void chacha20_block(u32 *state, u8 *stream) state[12]++; } EXPORT_SYMBOL(chacha20_block); + +/** + * hchacha20_block - abbreviated ChaCha20 core, for XChaCha20 + * @in: input state matrix (16 32-bit words) + * @out: output (8 32-bit words) + * + * HChaCha20 is the ChaCha equivalent of HSalsa20 and is an intermediate step + * towards XChaCha20 (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). + * HChaCha20 skips the final addition of the initial state, and outputs only + * certain words of the state. It should not be used for streaming directly. + */ +void hchacha20_block(const u32 *in, u32 *out) +{ + u32 x[16]; + + memcpy(x, in, 64); + + chacha20_permute(x); + + memcpy(&out[0], &x[0], 16); + memcpy(&out[4], &x[12], 16); +} +EXPORT_SYMBOL(hchacha20_block); -- cgit v1.2.3 From 1ca1b917940c24ca3d1f490118c5474168622953 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:21 -0800 Subject: crypto: chacha20-generic - refactor to allow varying number of rounds In preparation for adding XChaCha12 support, rename/refactor chacha20-generic to support different numbers of rounds. The justification for needing XChaCha12 support is explained in more detail in the patch "crypto: chacha - add XChaCha12 support". The only difference between ChaCha{8,12,20} are the number of rounds itself; all other parts of the algorithm are the same. Therefore, remove the "20" from all definitions, structures, functions, files, etc. that will be shared by all ChaCha versions. Also make ->setkey() store the round count in the chacha_ctx (previously chacha20_ctx). The generic code then passes the round count through to chacha_block(). There will be a ->setkey() function for each explicitly allowed round count; the encrypt/decrypt functions will be the same. I decided not to do it the opposite way (same ->setkey() function for all round counts, with different encrypt/decrypt functions) because that would have required more boilerplate code in architecture-specific implementations of ChaCha and XChaCha. Reviewed-by: Ard Biesheuvel Acked-by: Martin Willi Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/Makefile | 2 +- lib/chacha.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/chacha20.c | 112 ------------------------------------------------------ 3 files changed, 118 insertions(+), 113 deletions(-) create mode 100644 lib/chacha.c delete mode 100644 lib/chacha20.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index db06d1237898..4c2b6fc5cde9 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -20,7 +20,7 @@ KCOV_INSTRUMENT_dynamic_debug.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o timerqueue.o xarray.o \ idr.o int_sqrt.o extable.o \ - sha1.o chacha20.o irq_regs.o argv_split.o \ + sha1.o chacha.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ diff --git a/lib/chacha.c b/lib/chacha.c new file mode 100644 index 000000000000..1bdc688c18df --- /dev/null +++ b/lib/chacha.c @@ -0,0 +1,117 @@ +/* + * The "hash function" used as the core of the ChaCha stream cipher (RFC7539) + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +static void chacha_permute(u32 *x, int nrounds) +{ + int i; + + /* whitelist the allowed round counts */ + WARN_ON_ONCE(nrounds != 20); + + for (i = 0; i < nrounds; i += 2) { + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); + + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); + + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); + + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); + + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); + + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); + + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); + + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); + } +} + +/** + * chacha_block - generate one keystream block and increment block counter + * @state: input state matrix (16 32-bit words) + * @stream: output keystream block (64 bytes) + * @nrounds: number of rounds (currently must be 20) + * + * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. + * The caller has already converted the endianness of the input. This function + * also handles incrementing the block counter in the input matrix. + */ +void chacha_block(u32 *state, u8 *stream, int nrounds) +{ + u32 x[16]; + int i; + + memcpy(x, state, 64); + + chacha_permute(x, nrounds); + + for (i = 0; i < ARRAY_SIZE(x); i++) + put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); + + state[12]++; +} +EXPORT_SYMBOL(chacha_block); + +/** + * hchacha_block - abbreviated ChaCha core, for XChaCha + * @in: input state matrix (16 32-bit words) + * @out: output (8 32-bit words) + * @nrounds: number of rounds (currently must be 20) + * + * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step + * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha + * skips the final addition of the initial state, and outputs only certain words + * of the state. It should not be used for streaming directly. + */ +void hchacha_block(const u32 *in, u32 *out, int nrounds) +{ + u32 x[16]; + + memcpy(x, in, 64); + + chacha_permute(x, nrounds); + + memcpy(&out[0], &x[0], 16); + memcpy(&out[4], &x[12], 16); +} +EXPORT_SYMBOL(hchacha_block); diff --git a/lib/chacha20.c b/lib/chacha20.c deleted file mode 100644 index 6a484e16171d..000000000000 --- a/lib/chacha20.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * The "hash function" used as the core of the ChaCha20 stream cipher (RFC7539) - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include - -static void chacha20_permute(u32 *x) -{ - int i; - - for (i = 0; i < 20; i += 2) { - x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); - x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); - x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); - x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); - - x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); - x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); - x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); - x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); - - x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); - x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); - x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); - x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); - - x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); - x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); - x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); - x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); - - x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); - x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); - x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); - x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); - - x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); - x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); - x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); - x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); - - x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); - x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); - x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); - x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); - - x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); - x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); - x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); - x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); - } -} - -/** - * chacha20_block - generate one keystream block and increment block counter - * @state: input state matrix (16 32-bit words) - * @stream: output keystream block (64 bytes) - * - * This is the ChaCha20 core, a function from 64-byte strings to 64-byte - * strings. The caller has already converted the endianness of the input. This - * function also handles incrementing the block counter in the input matrix. - */ -void chacha20_block(u32 *state, u8 *stream) -{ - u32 x[16]; - int i; - - memcpy(x, state, 64); - - chacha20_permute(x); - - for (i = 0; i < ARRAY_SIZE(x); i++) - put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); - - state[12]++; -} -EXPORT_SYMBOL(chacha20_block); - -/** - * hchacha20_block - abbreviated ChaCha20 core, for XChaCha20 - * @in: input state matrix (16 32-bit words) - * @out: output (8 32-bit words) - * - * HChaCha20 is the ChaCha equivalent of HSalsa20 and is an intermediate step - * towards XChaCha20 (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). - * HChaCha20 skips the final addition of the initial state, and outputs only - * certain words of the state. It should not be used for streaming directly. - */ -void hchacha20_block(const u32 *in, u32 *out) -{ - u32 x[16]; - - memcpy(x, in, 64); - - chacha20_permute(x); - - memcpy(&out[0], &x[0], 16); - memcpy(&out[4], &x[12], 16); -} -EXPORT_SYMBOL(hchacha20_block); -- cgit v1.2.3 From aa7624093cb7fbf4fea95e612580d8d29a819f67 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Nov 2018 17:26:22 -0800 Subject: crypto: chacha - add XChaCha12 support Now that the generic implementation of ChaCha20 has been refactored to allow varying the number of rounds, add support for XChaCha12, which is the XSalsa construction applied to ChaCha12. ChaCha12 is one of the three ciphers specified by the original ChaCha paper (https://cr.yp.to/chacha/chacha-20080128.pdf: "ChaCha, a variant of Salsa20"), alongside ChaCha8 and ChaCha20. ChaCha12 is faster than ChaCha20 but has a lower, but still large, security margin. We need XChaCha12 support so that it can be used in the Adiantum encryption mode, which enables disk/file encryption on low-end mobile devices where AES-XTS is too slow as the CPUs lack AES instructions. We'd prefer XChaCha20 (the more popular variant), but it's too slow on some of our target devices, so at least in some cases we do need the XChaCha12-based version. In more detail, the problem is that Adiantum is still much slower than we're happy with, and encryption still has a quite noticeable effect on the feel of low-end devices. Users and vendors push back hard against encryption that degrades the user experience, which always risks encryption being disabled entirely. So we need to choose the fastest option that gives us a solid margin of security, and here that's XChaCha12. The best known attack on ChaCha breaks only 7 rounds and has 2^235 time complexity, so ChaCha12's security margin is still better than AES-256's. Much has been learned about cryptanalysis of ARX ciphers since Salsa20 was originally designed in 2005, and it now seems we can be comfortable with a smaller number of rounds. The eSTREAM project also suggests the 12-round version of Salsa20 as providing the best balance among the different variants: combining very good performance with a "comfortable margin of security". Note that it would be trivial to add vanilla ChaCha12 in addition to XChaCha12. However, it's unneeded for now and therefore is omitted. As discussed in the patch that introduced XChaCha20 support, I considered splitting the code into separate chacha-common, chacha20, xchacha20, and xchacha12 modules, so that these algorithms could be enabled/disabled independently. However, since nearly all the code is shared anyway, I ultimately decided there would have been little benefit to the added complexity. Reviewed-by: Ard Biesheuvel Acked-by: Martin Willi Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/chacha.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/chacha.c b/lib/chacha.c index 1bdc688c18df..a46d2832dbab 100644 --- a/lib/chacha.c +++ b/lib/chacha.c @@ -21,7 +21,7 @@ static void chacha_permute(u32 *x, int nrounds) int i; /* whitelist the allowed round counts */ - WARN_ON_ONCE(nrounds != 20); + WARN_ON_ONCE(nrounds != 20 && nrounds != 12); for (i = 0; i < nrounds; i += 2) { x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); @@ -70,7 +70,7 @@ static void chacha_permute(u32 *x, int nrounds) * chacha_block - generate one keystream block and increment block counter * @state: input state matrix (16 32-bit words) * @stream: output keystream block (64 bytes) - * @nrounds: number of rounds (currently must be 20) + * @nrounds: number of rounds (20 or 12; 20 is recommended) * * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. * The caller has already converted the endianness of the input. This function @@ -96,7 +96,7 @@ EXPORT_SYMBOL(chacha_block); * hchacha_block - abbreviated ChaCha core, for XChaCha * @in: input state matrix (16 32-bit words) * @out: output (8 32-bit words) - * @nrounds: number of rounds (currently must be 20) + * @nrounds: number of rounds (20 or 12; 20 is recommended) * * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha -- cgit v1.2.3