summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 10:25:58 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 10:25:58 -0800
commit27d189c02ba25851973c8582e419c0bded9f7e5b (patch)
treebe142d664bc4e3cec7ab2878a243343f46e897ee
parenta1703154200c390ab03c10224c586e815d3e31e8 (diff)
parent55db8387a5e8d07407f0b7c6b2526417a2bc6243 (diff)
downloadlinux-27d189c02ba25851973c8582e419c0bded9f7e5b.tar.gz
linux-27d189c02ba25851973c8582e419c0bded9f7e5b.tar.bz2
linux-27d189c02ba25851973c8582e419c0bded9f7e5b.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (46 commits) hwrng: via_rng - Fix memory scribbling on some CPUs crypto: padlock - Move padlock.h into include/crypto hwrng: via_rng - Fix asm constraints crypto: n2 - use __devexit not __exit in n2_unregister_algs crypto: mark crypto workqueues CPU_INTENSIVE crypto: mv_cesa - dont return PTR_ERR() of wrong pointer crypto: ripemd - Set module author and update email address crypto: omap-sham - backlog handling fix crypto: gf128mul - Remove experimental tag crypto: af_alg - fix af_alg memory_allocated data type crypto: aesni-intel - Fixed build with binutils 2.16 crypto: af_alg - Make sure sk_security is initialized on accept()ed sockets net: Add missing lockdep class names for af_alg include: Install linux/if_alg.h for user-space crypto API crypto: omap-aes - checkpatch --file warning fixes crypto: omap-aes - initialize aes module once per request crypto: omap-aes - unnecessary code removed crypto: omap-aes - error handling implementation improved crypto: omap-aes - redundant locking is removed crypto: omap-aes - DMA initialization fixes for OMAP off mode ...
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S1832
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c540
-rw-r--r--crypto/Kconfig34
-rw-r--r--crypto/Makefile17
-rw-r--r--crypto/af_alg.c483
-rw-r--r--crypto/algif_hash.c319
-rw-r--r--crypto/algif_skcipher.c632
-rw-r--r--crypto/authenc.c22
-rw-r--r--crypto/cast5.c74
-rw-r--r--crypto/crypto_wq.c3
-rw-r--r--crypto/deflate.c3
-rw-r--r--crypto/eseqiv.c18
-rw-r--r--crypto/gcm.c19
-rw-r--r--crypto/pcrypt.c3
-rw-r--r--crypto/rmd128.c3
-rw-r--r--crypto/rmd160.c3
-rw-r--r--crypto/rmd256.c3
-rw-r--r--crypto/rmd320.c3
-rw-r--r--crypto/shash.c8
-rw-r--r--crypto/tcrypt.c11
-rw-r--r--crypto/testmgr.c24
-rw-r--r--crypto/testmgr.h361
-rw-r--r--crypto/zlib.c3
-rw-r--r--drivers/char/hw_random/via-rng.c10
-rw-r--r--drivers/crypto/mv_cesa.c2
-rw-r--r--drivers/crypto/n2_core.c2
-rw-r--r--drivers/crypto/omap-aes.c260
-rw-r--r--drivers/crypto/omap-sham.c374
-rw-r--r--drivers/crypto/padlock-aes.c2
-rw-r--r--drivers/crypto/padlock-sha.c8
-rw-r--r--include/crypto/if_alg.h92
-rw-r--r--include/crypto/padlock.h (renamed from drivers/crypto/padlock.h)8
-rw-r--r--include/crypto/scatterwalk.h15
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/if_alg.h40
-rw-r--r--include/linux/socket.h5
-rw-r--r--net/core/sock.c6
37 files changed, 4779 insertions, 464 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index ff16756a51c1..8fe2a4966b7a 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -9,6 +9,20 @@
* Vinodh Gopal <vinodh.gopal@intel.com>
* Kahraman Akdemir
*
+ * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
+ * interface for 64-bit kernels.
+ * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com)
+ * Aidan O'Mahony (aidan.o.mahony@intel.com)
+ * Adrian Hoban <adrian.hoban@intel.com>
+ * James Guilford (james.guilford@intel.com)
+ * Gabriele Paoloni <gabriele.paoloni@intel.com>
+ * Tadeusz Struk (tadeusz.struk@intel.com)
+ * Wajdi Feghali (wajdi.k.feghali@intel.com)
+ * Copyright (c) 2010, Intel Corporation.
+ *
+ * Ported x86_64 version to x86:
+ * Author: Mathias Krause <minipli@googlemail.com>
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -18,8 +32,62 @@
#include <linux/linkage.h>
#include <asm/inst.h>
+#ifdef __x86_64__
+.data
+POLY: .octa 0xC2000000000000000000000000000001
+TWOONE: .octa 0x00000001000000000000000000000001
+
+# order of these constants should not change.
+# more specifically, ALL_F should follow SHIFT_MASK,
+# and ZERO should follow ALL_F
+
+SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
+MASK1: .octa 0x0000000000000000ffffffffffffffff
+MASK2: .octa 0xffffffffffffffff0000000000000000
+SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
+ALL_F: .octa 0xffffffffffffffffffffffffffffffff
+ZERO: .octa 0x00000000000000000000000000000000
+ONE: .octa 0x00000000000000000000000000000001
+F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
+dec: .octa 0x1
+enc: .octa 0x2
+
+
.text
+
+#define STACK_OFFSET 8*3
+#define HashKey 16*0 // store HashKey <<1 mod poly here
+#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here
+#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here
+#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here
+#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64
+ // bits of HashKey <<1 mod poly here
+ //(for Karatsuba purposes)
+#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64
+ // bits of HashKey^2 <<1 mod poly here
+ // (for Karatsuba purposes)
+#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64
+ // bits of HashKey^3 <<1 mod poly here
+ // (for Karatsuba purposes)
+#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64
+ // bits of HashKey^4 <<1 mod poly here
+ // (for Karatsuba purposes)
+#define VARIABLE_OFFSET 16*8
+
+#define arg1 rdi
+#define arg2 rsi
+#define arg3 rdx
+#define arg4 rcx
+#define arg5 r8
+#define arg6 r9
+#define arg7 STACK_OFFSET+8(%r14)
+#define arg8 STACK_OFFSET+16(%r14)
+#define arg9 STACK_OFFSET+24(%r14)
+#define arg10 STACK_OFFSET+32(%r14)
+#endif
+
+
#define STATE1 %xmm0
#define STATE2 %xmm4
#define STATE3 %xmm5
@@ -32,12 +100,16 @@
#define IN IN1
#define KEY %xmm2
#define IV %xmm3
+
#define BSWAP_MASK %xmm10
#define CTR %xmm11
#define INC %xmm12
+#ifdef __x86_64__
+#define AREG %rax
#define KEYP %rdi
#define OUTP %rsi
+#define UKEYP OUTP
#define INP %rdx
#define LEN %rcx
#define IVP %r8
@@ -46,6 +118,1588 @@
#define TKEYP T1
#define T2 %r11
#define TCTR_LOW T2
+#else
+#define AREG %eax
+#define KEYP %edi
+#define OUTP AREG
+#define UKEYP OUTP
+#define INP %edx
+#define LEN %esi
+#define IVP %ebp
+#define KLEN %ebx
+#define T1 %ecx
+#define TKEYP T1
+#endif
+
+
+#ifdef __x86_64__
+/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
+*
+*
+* Input: A and B (128-bits each, bit-reflected)
+* Output: C = A*B*x mod poly, (i.e. >>1 )
+* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
+* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
+*
+*/
+.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5
+ movdqa \GH, \TMP1
+ pshufd $78, \GH, \TMP2
+ pshufd $78, \HK, \TMP3
+ pxor \GH, \TMP2 # TMP2 = a1+a0
+ pxor \HK, \TMP3 # TMP3 = b1+b0
+ PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1
+ PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0
+ PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0)
+ pxor \GH, \TMP2
+ pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0)
+ movdqa \TMP2, \TMP3
+ pslldq $8, \TMP3 # left shift TMP3 2 DWs
+ psrldq $8, \TMP2 # right shift TMP2 2 DWs
+ pxor \TMP3, \GH
+ pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK
+
+ # first phase of the reduction
+
+ movdqa \GH, \TMP2
+ movdqa \GH, \TMP3
+ movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4
+ # in in order to perform
+ # independent shifts
+ pslld $31, \TMP2 # packed right shift <<31
+ pslld $30, \TMP3 # packed right shift <<30
+ pslld $25, \TMP4 # packed right shift <<25
+ pxor \TMP3, \TMP2 # xor the shifted versions
+ pxor \TMP4, \TMP2
+ movdqa \TMP2, \TMP5
+ psrldq $4, \TMP5 # right shift TMP5 1 DW
+ pslldq $12, \TMP2 # left shift TMP2 3 DWs
+ pxor \TMP2, \GH
+
+ # second phase of the reduction
+
+ movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4
+ # in in order to perform
+ # independent shifts
+ movdqa \GH,\TMP3
+ movdqa \GH,\TMP4
+ psrld $1,\TMP2 # packed left shift >>1
+ psrld $2,\TMP3 # packed left shift >>2
+ psrld $7,\TMP4 # packed left shift >>7
+ pxor \TMP3,\TMP2 # xor the shifted versions
+ pxor \TMP4,\TMP2
+ pxor \TMP5, \TMP2
+ pxor \TMP2, \GH
+ pxor \TMP1, \GH # result is in TMP1
+.endm
+
+/*
+* if a = number of total plaintext bytes
+* b = floor(a/16)
+* num_initial_blocks = b mod 4
+* encrypt the initial num_initial_blocks blocks and apply ghash on
+* the ciphertext
+* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
+* are clobbered
+* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
+*/
+
+
+.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
+XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
+ mov arg7, %r10 # %r10 = AAD
+ mov arg8, %r12 # %r12 = aadLen
+ mov %r12, %r11
+ pxor %xmm\i, %xmm\i
+_get_AAD_loop\num_initial_blocks\operation:
+ movd (%r10), \TMP1
+ pslldq $12, \TMP1
+ psrldq $4, %xmm\i
+ pxor \TMP1, %xmm\i
+ add $4, %r10
+ sub $4, %r12
+ jne _get_AAD_loop\num_initial_blocks\operation
+ cmp $16, %r11
+ je _get_AAD_loop2_done\num_initial_blocks\operation
+ mov $16, %r12
+_get_AAD_loop2\num_initial_blocks\operation:
+ psrldq $4, %xmm\i
+ sub $4, %r12
+ cmp %r11, %r12
+ jne _get_AAD_loop2\num_initial_blocks\operation
+_get_AAD_loop2_done\num_initial_blocks\operation:
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
+
+ xor %r11, %r11 # initialise the data pointer offset as zero
+
+ # start AES for num_initial_blocks blocks
+
+ mov %arg5, %rax # %rax = *Y0
+ movdqu (%rax), \XMM0 # XMM0 = Y0
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM0
+
+.if (\i == 5) || (\i == 6) || (\i == 7)
+.irpc index, \i_seq
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, %xmm\index
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
+
+.endr
+.irpc index, \i_seq
+ pxor 16*0(%arg1), %xmm\index
+.endr
+.irpc index, \i_seq
+ movaps 0x10(%rdi), \TMP1
+ AESENC \TMP1, %xmm\index # Round 1
+.endr
+.irpc index, \i_seq
+ movaps 0x20(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x30(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x40(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x50(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x60(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x70(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x80(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x90(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0xa0(%arg1), \TMP1
+ AESENCLAST \TMP1, %xmm\index # Round 10
+.endr
+.irpc index, \i_seq
+ movdqu (%arg3 , %r11, 1), \TMP1
+ pxor \TMP1, %xmm\index
+ movdqu %xmm\index, (%arg2 , %r11, 1)
+ # write back plaintext/ciphertext for num_initial_blocks
+ add $16, %r11
+
+ movdqa \TMP1, %xmm\index
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, %xmm\index
+
+ # prepare plaintext/ciphertext for GHASH computation
+.endr
+.endif
+ GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ # apply GHASH on num_initial_blocks blocks
+
+.if \i == 5
+ pxor %xmm5, %xmm6
+ GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ pxor %xmm6, %xmm7
+ GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ pxor %xmm7, %xmm8
+ GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+.elseif \i == 6
+ pxor %xmm6, %xmm7
+ GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ pxor %xmm7, %xmm8
+ GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+.elseif \i == 7
+ pxor %xmm7, %xmm8
+ GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+.endif
+ cmp $64, %r13
+ jl _initial_blocks_done\num_initial_blocks\operation
+ # no need for precomputed values
+/*
+*
+* Precomputations for HashKey parallel with encryption of first 4 blocks.
+* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
+*/
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, \XMM1
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
+
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, \XMM2
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
+
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, \XMM3
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
+
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, \XMM4
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
+
+ pxor 16*0(%arg1), \XMM1
+ pxor 16*0(%arg1), \XMM2
+ pxor 16*0(%arg1), \XMM3
+ pxor 16*0(%arg1), \XMM4
+ movdqa \TMP3, \TMP5
+ pshufd $78, \TMP3, \TMP1
+ pxor \TMP3, \TMP1
+ movdqa \TMP1, HashKey_k(%rsp)
+ GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^2<<1 (mod poly)
+ movdqa \TMP5, HashKey_2(%rsp)
+# HashKey_2 = HashKey^2<<1 (mod poly)
+ pshufd $78, \TMP5, \TMP1
+ pxor \TMP5, \TMP1
+ movdqa \TMP1, HashKey_2_k(%rsp)
+.irpc index, 1234 # do 4 rounds
+ movaps 0x10*\index(%arg1), \TMP1
+ AESENC \TMP1, \XMM1
+ AESENC \TMP1, \XMM2
+ AESENC \TMP1, \XMM3
+ AESENC \TMP1, \XMM4
+.endr
+ GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+ movdqa \TMP5, HashKey_3(%rsp)
+ pshufd $78, \TMP5, \TMP1
+ pxor \TMP5, \TMP1
+ movdqa \TMP1, HashKey_3_k(%rsp)
+.irpc index, 56789 # do next 5 rounds
+ movaps 0x10*\index(%arg1), \TMP1
+ AESENC \TMP1, \XMM1
+ AESENC \TMP1, \XMM2
+ AESENC \TMP1, \XMM3
+ AESENC \TMP1, \XMM4
+.endr
+ GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+ movdqa \TMP5, HashKey_4(%rsp)
+ pshufd $78, \TMP5, \TMP1
+ pxor \TMP5, \TMP1
+ movdqa \TMP1, HashKey_4_k(%rsp)
+ movaps 0xa0(%arg1), \TMP2
+ AESENCLAST \TMP2, \XMM1
+ AESENCLAST \TMP2, \XMM2
+ AESENCLAST \TMP2, \XMM3
+ AESENCLAST \TMP2, \XMM4
+ movdqu 16*0(%arg3 , %r11 , 1), \TMP1
+ pxor \TMP1, \XMM1
+ movdqu \XMM1, 16*0(%arg2 , %r11 , 1)
+ movdqa \TMP1, \XMM1
+ movdqu 16*1(%arg3 , %r11 , 1), \TMP1
+ pxor \TMP1, \XMM2
+ movdqu \XMM2, 16*1(%arg2 , %r11 , 1)
+ movdqa \TMP1, \XMM2
+ movdqu 16*2(%arg3 , %r11 , 1), \TMP1
+ pxor \TMP1, \XMM3
+ movdqu \XMM3, 16*2(%arg2 , %r11 , 1)
+ movdqa \TMP1, \XMM3
+ movdqu 16*3(%arg3 , %r11 , 1), \TMP1
+ pxor \TMP1, \XMM4
+ movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
+ movdqa \TMP1, \XMM4
+ add $64, %r11
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
+ pxor \XMMDst, \XMM1
+# combine GHASHed value with the corresponding ciphertext
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
+
+_initial_blocks_done\num_initial_blocks\operation:
+
+.endm
+
+
+/*
+* if a = number of total plaintext bytes
+* b = floor(a/16)
+* num_initial_blocks = b mod 4
+* encrypt the initial num_initial_blocks blocks and apply ghash on
+* the ciphertext
+* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
+* are clobbered
+* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
+*/
+
+
+.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
+XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
+ mov arg7, %r10 # %r10 = AAD
+ mov arg8, %r12 # %r12 = aadLen
+ mov %r12, %r11
+ pxor %xmm\i, %xmm\i
+_get_AAD_loop\num_initial_blocks\operation:
+ movd (%r10), \TMP1
+ pslldq $12, \TMP1
+ psrldq $4, %xmm\i
+ pxor \TMP1, %xmm\i
+ add $4, %r10
+ sub $4, %r12
+ jne _get_AAD_loop\num_initial_blocks\operation
+ cmp $16, %r11
+ je _get_AAD_loop2_done\num_initial_blocks\operation
+ mov $16, %r12
+_get_AAD_loop2\num_initial_blocks\operation:
+ psrldq $4, %xmm\i
+ sub $4, %r12
+ cmp %r11, %r12
+ jne _get_AAD_loop2\num_initial_blocks\operation
+_get_AAD_loop2_done\num_initial_blocks\operation:
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
+
+ xor %r11, %r11 # initialise the data pointer offset as zero
+
+ # start AES for num_initial_blocks blocks
+
+ mov %arg5, %rax # %rax = *Y0
+ movdqu (%rax), \XMM0 # XMM0 = Y0
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM0
+
+.if (\i == 5) || (\i == 6) || (\i == 7)
+.irpc index, \i_seq
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, %xmm\index
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
+
+.endr
+.irpc index, \i_seq
+ pxor 16*0(%arg1), %xmm\index
+.endr
+.irpc index, \i_seq
+ movaps 0x10(%rdi), \TMP1
+ AESENC \TMP1, %xmm\index # Round 1
+.endr
+.irpc index, \i_seq
+ movaps 0x20(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x30(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x40(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x50(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x60(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x70(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x80(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0x90(%arg1), \TMP1
+ AESENC \TMP1, %xmm\index # Round 2
+.endr
+.irpc index, \i_seq
+ movaps 0xa0(%arg1), \TMP1
+ AESENCLAST \TMP1, %xmm\index # Round 10
+.endr
+.irpc index, \i_seq
+ movdqu (%arg3 , %r11, 1), \TMP1
+ pxor \TMP1, %xmm\index
+ movdqu %xmm\index, (%arg2 , %r11, 1)
+ # write back plaintext/ciphertext for num_initial_blocks
+ add $16, %r11
+
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, %xmm\index
+
+ # prepare plaintext/ciphertext for GHASH computation
+.endr
+.endif
+ GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ # apply GHASH on num_initial_blocks blocks
+
+.if \i == 5
+ pxor %xmm5, %xmm6
+ GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ pxor %xmm6, %xmm7
+ GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ pxor %xmm7, %xmm8
+ GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+.elseif \i == 6
+ pxor %xmm6, %xmm7
+ GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ pxor %xmm7, %xmm8
+ GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+.elseif \i == 7
+ pxor %xmm7, %xmm8
+ GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+.endif
+ cmp $64, %r13
+ jl _initial_blocks_done\num_initial_blocks\operation
+ # no need for precomputed values
+/*
+*
+* Precomputations for HashKey parallel with encryption of first 4 blocks.
+* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
+*/
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, \XMM1
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
+
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, \XMM2
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
+
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, \XMM3
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
+
+ paddd ONE(%rip), \XMM0 # INCR Y0
+ movdqa \XMM0, \XMM4
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
+
+ pxor 16*0(%arg1), \XMM1
+ pxor 16*0(%arg1), \XMM2
+ pxor 16*0(%arg1), \XMM3
+ pxor 16*0(%arg1), \XMM4
+ movdqa \TMP3, \TMP5
+ pshufd $78, \TMP3, \TMP1
+ pxor \TMP3, \TMP1
+ movdqa \TMP1, HashKey_k(%rsp)
+ GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^2<<1 (mod poly)
+ movdqa \TMP5, HashKey_2(%rsp)
+# HashKey_2 = HashKey^2<<1 (mod poly)
+ pshufd $78, \TMP5, \TMP1
+ pxor \TMP5, \TMP1
+ movdqa \TMP1, HashKey_2_k(%rsp)
+.irpc index, 1234 # do 4 rounds
+ movaps 0x10*\index(%arg1), \TMP1
+ AESENC \TMP1, \XMM1
+ AESENC \TMP1, \XMM2
+ AESENC \TMP1, \XMM3
+ AESENC \TMP1, \XMM4
+.endr
+ GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+ movdqa \TMP5, HashKey_3(%rsp)
+ pshufd $78, \TMP5, \TMP1
+ pxor \TMP5, \TMP1
+ movdqa \TMP1, HashKey_3_k(%rsp)
+.irpc index, 56789 # do next 5 rounds
+ movaps 0x10*\index(%arg1), \TMP1
+ AESENC \TMP1, \XMM1
+ AESENC \TMP1, \XMM2
+ AESENC \TMP1, \XMM3
+ AESENC \TMP1, \XMM4
+.endr
+ GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+ movdqa \TMP5, HashKey_4(%rsp)
+ pshufd $78, \TMP5, \TMP1
+ pxor \TMP5, \TMP1
+ movdqa \TMP1, HashKey_4_k(%rsp)
+ movaps 0xa0(%arg1), \TMP2
+ AESENCLAST \TMP2, \XMM1
+ AESENCLAST \TMP2, \XMM2
+ AESENCLAST \TMP2, \XMM3
+ AESENCLAST \TMP2, \XMM4
+ movdqu 16*0(%arg3 , %r11 , 1), \TMP1
+ pxor \TMP1, \XMM1
+ movdqu 16*1(%arg3 , %r11 , 1), \TMP1
+ pxor \TMP1, \XMM2
+ movdqu 16*2(%arg3 , %r11 , 1), \TMP1
+ pxor \TMP1, \XMM3
+ movdqu 16*3(%arg3 , %r11 , 1), \TMP1
+ pxor \TMP1, \XMM4
+ movdqu \XMM1, 16*0(%arg2 , %r11 , 1)
+ movdqu \XMM2, 16*1(%arg2 , %r11 , 1)
+ movdqu \XMM3, 16*2(%arg2 , %r11 , 1)
+ movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
+
+ add $64, %r11
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
+ pxor \XMMDst, \XMM1
+# combine GHASHed value with the corresponding ciphertext
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
+ movdqa SHUF_MASK(%rip), %xmm14
+ PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
+
+_initial_blocks_done\num_initial_blocks\operation:
+
+.endm
+
+/*
+* encrypt 4 blocks at a time
+* ghash the 4 previously encrypted ciphertext blocks
+* arg1, %arg2, %arg3 are used as pointers only, not modified
+* %r11 is the data offset value
+*/
+.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
+TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
+
+ movdqa \XMM1, \XMM5
+ movdqa \XMM2, \XMM6
+ movdqa \XMM3, \XMM7
+ movdqa \XMM4, \XMM8
+
+ movdqa SHUF_MASK(%rip), %xmm15
+ # multiply TMP5 * HashKey using karatsuba
+
+ movdqa \XMM5, \TMP4
+ pshufd $78, \XMM5, \TMP6
+ pxor \XMM5, \TMP6
+ paddd ONE(%rip), \XMM0 # INCR CNT
+ movdqa HashKey_4(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
+ movdqa \XMM0, \XMM1
+ paddd ONE(%rip), \XMM0 # INCR CNT
+ movdqa \XMM0, \XMM2
+ paddd ONE(%rip), \XMM0 # INCR CNT
+ movdqa \XMM0, \XMM3
+ paddd ONE(%rip), \XMM0 # INCR CNT
+ movdqa \XMM0, \XMM4
+ PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
+ PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0
+ PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
+
+ pxor (%arg1), \XMM1
+ pxor (%arg1), \XMM2
+ pxor (%arg1), \XMM3
+ pxor (%arg1), \XMM4
+ movdqa HashKey_4_k(%rsp), \TMP5
+ PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
+ movaps 0x10(%arg1), \TMP1
+ AESENC \TMP1, \XMM1 # Round 1
+ AESENC \TMP1, \XMM2
+ AESENC \TMP1, \XMM3
+ AESENC \TMP1, \XMM4
+ movaps 0x20(%arg1), \TMP1
+ AESENC \TMP1, \XMM1 # Round 2
+ AESENC \TMP1, \XMM2
+ AESENC \TMP1, \XMM3
+ AESENC \TMP1, \XMM4
+ movdqa \XMM6, \TMP1
+ pshufd $78, \XMM6, \TMP2
+ pxor \XMM6, \TMP2
+ movdqa HashKey_3(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
+ movaps 0x30(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 3
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0
+ movaps 0x40(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 4
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ movdqa HashKey_3_k(%rsp), \TMP5
+ PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ movaps 0x50(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 5
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ pxor \TMP1, \TMP4
+# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
+ pxor \XMM6, \XMM5
+ pxor \TMP2, \TMP6
+ movdqa \XMM7, \TMP1
+ pshufd $78, \XMM7, \TMP2
+ pxor \XMM7, \TMP2
+ movdqa HashKey_2(%rsp ), \TMP5
+
+ # Multiply TMP5 * HashKey using karatsuba
+
+ PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ movaps 0x60(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 6
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0
+ movaps 0x70(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 7
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ movdqa HashKey_2_k(%rsp), \TMP5
+ PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ movaps 0x80(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 8
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ pxor \TMP1, \TMP4
+# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
+ pxor \XMM7, \XMM5
+ pxor \TMP2, \TMP6
+
+ # Multiply XMM8 * HashKey
+ # XMM8 and TMP5 hold the values for the two operands
+
+ movdqa \XMM8, \TMP1
+ pshufd $78, \XMM8, \TMP2
+ pxor \XMM8, \TMP2
+ movdqa HashKey(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ movaps 0x90(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 9
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
+ movaps 0xa0(%arg1), \TMP3
+ AESENCLAST \TMP3, \XMM1 # Round 10
+ AESENCLAST \TMP3, \XMM2
+ AESENCLAST \TMP3, \XMM3
+ AESENCLAST \TMP3, \XMM4
+ movdqa HashKey_k(%rsp), \TMP5
+ PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ movdqu (%arg3,%r11,1), \TMP3
+ pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
+ movdqu 16(%arg3,%r11,1), \TMP3
+ pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
+ movdqu 32(%arg3,%r11,1), \TMP3
+ pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
+ movdqu 48(%arg3,%r11,1), \TMP3
+ pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
+ movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer
+ movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer
+ movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer
+ movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer
+ PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
+
+ pxor \TMP4, \TMP1
+ pxor \XMM8, \XMM5
+ pxor \TMP6, \TMP2
+ pxor \TMP1, \TMP2
+ pxor \XMM5, \TMP2
+ movdqa \TMP2, \TMP3
+ pslldq $8, \TMP3 # left shift TMP3 2 DWs
+ psrldq $8, \TMP2 # right shift TMP2 2 DWs
+ pxor \TMP3, \XMM5
+ pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5
+
+ # first phase of reduction
+
+ movdqa \XMM5, \TMP2
+ movdqa \XMM5, \TMP3
+ movdqa \XMM5, \TMP4
+# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
+ pslld $31, \TMP2 # packed right shift << 31
+ pslld $30, \TMP3 # packed right shift << 30
+ pslld $25, \TMP4 # packed right shift << 25
+ pxor \TMP3, \TMP2 # xor the shifted versions
+ pxor \TMP4, \TMP2
+ movdqa \TMP2, \TMP5
+ psrldq $4, \TMP5 # right shift T5 1 DW
+ pslldq $12, \TMP2 # left shift T2 3 DWs
+ pxor \TMP2, \XMM5
+
+ # second phase of reduction
+
+ movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
+ movdqa \XMM5,\TMP3
+ movdqa \XMM5,\TMP4
+ psrld $1, \TMP2 # packed left shift >>1
+ psrld $2, \TMP3 # packed left shift >>2
+ psrld $7, \TMP4 # packed left shift >>7
+ pxor \TMP3,\TMP2 # xor the shifted versions
+ pxor \TMP4,\TMP2
+ pxor \TMP5, \TMP2
+ pxor \TMP2, \XMM5
+ pxor \TMP1, \XMM5 # result is in TMP1
+
+ pxor \XMM5, \XMM1
+.endm
+
+/*
+* decrypt 4 blocks at a time
+* ghash the 4 previously decrypted ciphertext blocks
+* arg1, %arg2, %arg3 are used as pointers only, not modified
+* %r11 is the data offset value
+*/
+.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
+TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
+
+ movdqa \XMM1, \XMM5
+ movdqa \XMM2, \XMM6
+ movdqa \XMM3, \XMM7
+ movdqa \XMM4, \XMM8
+
+ movdqa SHUF_MASK(%rip), %xmm15
+ # multiply TMP5 * HashKey using karatsuba
+
+ movdqa \XMM5, \TMP4
+ pshufd $78, \XMM5, \TMP6
+ pxor \XMM5, \TMP6
+ paddd ONE(%rip), \XMM0 # INCR CNT
+ movdqa HashKey_4(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
+ movdqa \XMM0, \XMM1
+ paddd ONE(%rip), \XMM0 # INCR CNT
+ movdqa \XMM0, \XMM2
+ paddd ONE(%rip), \XMM0 # INCR CNT
+ movdqa \XMM0, \XMM3
+ paddd ONE(%rip), \XMM0 # INCR CNT
+ movdqa \XMM0, \XMM4
+ PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
+ PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0
+ PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
+
+ pxor (%arg1), \XMM1
+ pxor (%arg1), \XMM2
+ pxor (%arg1), \XMM3
+ pxor (%arg1), \XMM4
+ movdqa HashKey_4_k(%rsp), \TMP5
+ PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
+ movaps 0x10(%arg1), \TMP1
+ AESENC \TMP1, \XMM1 # Round 1
+ AESENC \TMP1, \XMM2
+ AESENC \TMP1, \XMM3
+ AESENC \TMP1, \XMM4
+ movaps 0x20(%arg1), \TMP1
+ AESENC \TMP1, \XMM1 # Round 2
+ AESENC \TMP1, \XMM2
+ AESENC \TMP1, \XMM3
+ AESENC \TMP1, \XMM4
+ movdqa \XMM6, \TMP1
+ pshufd $78, \XMM6, \TMP2
+ pxor \XMM6, \TMP2
+ movdqa HashKey_3(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
+ movaps 0x30(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 3
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0
+ movaps 0x40(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 4
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ movdqa HashKey_3_k(%rsp), \TMP5
+ PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ movaps 0x50(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 5
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ pxor \TMP1, \TMP4
+# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
+ pxor \XMM6, \XMM5
+ pxor \TMP2, \TMP6
+ movdqa \XMM7, \TMP1
+ pshufd $78, \XMM7, \TMP2
+ pxor \XMM7, \TMP2
+ movdqa HashKey_2(%rsp ), \TMP5
+
+ # Multiply TMP5 * HashKey using karatsuba
+
+ PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ movaps 0x60(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 6
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0
+ movaps 0x70(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 7
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ movdqa HashKey_2_k(%rsp), \TMP5
+ PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ movaps 0x80(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 8
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ pxor \TMP1, \TMP4
+# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
+ pxor \XMM7, \XMM5
+ pxor \TMP2, \TMP6
+
+ # Multiply XMM8 * HashKey
+ # XMM8 and TMP5 hold the values for the two operands
+
+ movdqa \XMM8, \TMP1
+ pshufd $78, \XMM8, \TMP2
+ pxor \XMM8, \TMP2
+ movdqa HashKey(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ movaps 0x90(%arg1), \TMP3
+ AESENC \TMP3, \XMM1 # Round 9
+ AESENC \TMP3, \XMM2
+ AESENC \TMP3, \XMM3
+ AESENC \TMP3, \XMM4
+ PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
+ movaps 0xa0(%arg1), \TMP3
+ AESENCLAST \TMP3, \XMM1 # Round 10
+ AESENCLAST \TMP3, \XMM2
+ AESENCLAST \TMP3, \XMM3
+ AESENCLAST \TMP3, \XMM4
+ movdqa HashKey_k(%rsp), \TMP5
+ PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ movdqu (%arg3,%r11,1), \TMP3
+ pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
+ movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer
+ movdqa \TMP3, \XMM1
+ movdqu 16(%arg3,%r11,1), \TMP3
+ pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
+ movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer
+ movdqa \TMP3, \XMM2
+ movdqu 32(%arg3,%r11,1), \TMP3
+ pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
+ movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer
+ movdqa \TMP3, \XMM3
+ movdqu 48(%arg3,%r11,1), \TMP3
+ pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
+ movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer
+ movdqa \TMP3, \XMM4
+ PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
+ PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
+
+ pxor \TMP4, \TMP1
+ pxor \XMM8, \XMM5
+ pxor \TMP6, \TMP2
+ pxor \TMP1, \TMP2
+ pxor \XMM5, \TMP2
+ movdqa \TMP2, \TMP3
+ pslldq $8, \TMP3 # left shift TMP3 2 DWs
+ psrldq $8, \TMP2 # right shift TMP2 2 DWs
+ pxor \TMP3, \XMM5
+ pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5
+
+ # first phase of reduction
+
+ movdqa \XMM5, \TMP2
+ movdqa \XMM5, \TMP3
+ movdqa \XMM5, \TMP4
+# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
+ pslld $31, \TMP2 # packed right shift << 31
+ pslld $30, \TMP3 # packed right shift << 30
+ pslld $25, \TMP4 # packed right shift << 25
+ pxor \TMP3, \TMP2 # xor the shifted versions
+ pxor \TMP4, \TMP2
+ movdqa \TMP2, \TMP5
+ psrldq $4, \TMP5 # right shift T5 1 DW
+ pslldq $12, \TMP2 # left shift T2 3 DWs
+ pxor \TMP2, \XMM5
+
+ # second phase of reduction
+
+ movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
+ movdqa \XMM5,\TMP3
+ movdqa \XMM5,\TMP4
+ psrld $1, \TMP2 # packed left shift >>1
+ psrld $2, \TMP3 # packed left shift >>2
+ psrld $7, \TMP4 # packed left shift >>7
+ pxor \TMP3,\TMP2 # xor the shifted versions
+ pxor \TMP4,\TMP2
+ pxor \TMP5, \TMP2
+ pxor \TMP2, \XMM5
+ pxor \TMP1, \XMM5 # result is in TMP1
+
+ pxor \XMM5, \XMM1
+.endm
+
+/* GHASH the last 4 ciphertext blocks. */
+.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \
+TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
+
+ # Multiply TMP6 * HashKey (using Karatsuba)
+
+ movdqa \XMM1, \TMP6
+ pshufd $78, \XMM1, \TMP2
+ pxor \XMM1, \TMP2
+ movdqa HashKey_4(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
+ PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
+ movdqa HashKey_4_k(%rsp), \TMP4
+ PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ movdqa \XMM1, \XMMDst
+ movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
+
+ # Multiply TMP1 * HashKey (using Karatsuba)
+
+ movdqa \XMM2, \TMP1
+ pshufd $78, \XMM2, \TMP2
+ pxor \XMM2, \TMP2
+ movdqa HashKey_3(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
+ movdqa HashKey_3_k(%rsp), \TMP4
+ PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pxor \TMP1, \TMP6
+ pxor \XMM2, \XMMDst
+ pxor \TMP2, \XMM1
+# results accumulated in TMP6, XMMDst, XMM1
+
+ # Multiply TMP1 * HashKey (using Karatsuba)
+
+ movdqa \XMM3, \TMP1
+ pshufd $78, \XMM3, \TMP2
+ pxor \XMM3, \TMP2
+ movdqa HashKey_2(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
+ movdqa HashKey_2_k(%rsp), \TMP4
+ PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pxor \TMP1, \TMP6
+ pxor \XMM3, \XMMDst
+ pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1
+
+ # Multiply TMP1 * HashKey (using Karatsuba)
+ movdqa \XMM4, \TMP1
+ pshufd $78, \XMM4, \TMP2
+ pxor \XMM4, \TMP2
+ movdqa HashKey(%rsp), \TMP5
+ PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
+ PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
+ movdqa HashKey_k(%rsp), \TMP4
+ PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
+ pxor \TMP1, \TMP6
+ pxor \XMM4, \XMMDst
+ pxor \XMM1, \TMP2
+ pxor \TMP6, \TMP2
+ pxor \XMMDst, \TMP2
+ # middle section of the temp results combined as in karatsuba algorithm
+ movdqa \TMP2, \TMP4
+ pslldq $8, \TMP4 # left shift TMP4 2 DWs
+ psrldq $8, \TMP2 # right shift TMP2 2 DWs
+ pxor \TMP4, \XMMDst
+ pxor \TMP2, \TMP6
+# TMP6:XMMDst holds the result of the accumulated carry-less multiplications
+ # first phase of the reduction
+ movdqa \XMMDst, \TMP2
+ movdqa \XMMDst, \TMP3
+ movdqa \XMMDst, \TMP4
+# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently
+ pslld $31, \TMP2 # packed right shifting << 31
+ pslld $30, \TMP3 # packed right shifting << 30
+ pslld $25, \TMP4 # packed right shifting << 25
+ pxor \TMP3, \TMP2 # xor the shifted versions
+ pxor \TMP4, \TMP2
+ movdqa \TMP2, \TMP7
+ psrldq $4, \TMP7 # right shift TMP7 1 DW
+ pslldq $12, \TMP2 # left shift TMP2 3 DWs
+ pxor \TMP2, \XMMDst
+
+ # second phase of the reduction
+ movdqa \XMMDst, \TMP2
+ # make 3 copies of XMMDst for doing 3 shift operations
+ movdqa \XMMDst, \TMP3
+ movdqa \XMMDst, \TMP4
+ psrld $1, \TMP2 # packed left shift >> 1
+ psrld $2, \TMP3 # packed left shift >> 2
+ psrld $7, \TMP4 # packed left shift >> 7
+ pxor \TMP3, \TMP2 # xor the shifted versions
+ pxor \TMP4, \TMP2
+ pxor \TMP7, \TMP2
+ pxor \TMP2, \XMMDst
+ pxor \TMP6, \XMMDst # reduced result is in XMMDst
+.endm
+
+/* Encryption of a single block done*/
+.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
+
+ pxor (%arg1), \XMM0
+ movaps 16(%arg1), \TMP1
+ AESENC \TMP1, \XMM0
+ movaps 32(%arg1), \TMP1
+ AESENC \TMP1, \XMM0
+ movaps 48(%arg1), \TMP1
+ AESENC \TMP1, \XMM0
+ movaps 64(%arg1), \TMP1
+ AESENC \TMP1, \XMM0
+ movaps 80(%arg1), \TMP1
+ AESENC \TMP1, \XMM0
+ movaps 96(%arg1), \TMP1
+ AESENC \TMP1, \XMM0
+ movaps 112(%arg1), \TMP1
+ AESENC \TMP1, \XMM0
+ movaps 128(%arg1), \TMP1
+ AESENC \TMP1, \XMM0
+ movaps 144(%arg1), \TMP1
+ AESENC \TMP1, \XMM0
+ movaps 160(%arg1), \TMP1
+ AESENCLAST \TMP1, \XMM0
+.endm
+
+
+/*****************************************************************************
+* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
+* u8 *out, // Plaintext output. Encrypt in-place is allowed.
+* const u8 *in, // Ciphertext input
+* u64 plaintext_len, // Length of data in bytes for decryption.
+* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association)
+* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
+* // concatenated with 0x00000001. 16-byte aligned pointer.
+* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary.
+* const u8 *aad, // Additional Authentication Data (AAD)
+* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
+* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the
+* // given authentication tag and only return the plaintext if they match.
+* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16
+* // (most likely), 12 or 8.
+*
+* Assumptions:
+*
+* keys:
+* keys are pre-expanded and aligned to 16 bytes. we are using the first
+* set of 11 keys in the data structure void *aes_ctx
+*
+* iv:
+* 0 1 2 3
+* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | Salt (From the SA) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | Initialization Vector |
+* | (This is the sequence number from IPSec header) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 0x1 |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*
+*
+*
+* AAD:
+* AAD padded to 128 bits with 0
+* for example, assume AAD is a u32 vector
+*
+* if AAD is 8 bytes:
+* AAD[3] = {A0, A1};
+* padded AAD in xmm register = {A1 A0 0 0}
+*
+* 0 1 2 3
+* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | SPI (A1) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 32-bit Sequence Number (A0) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 0x0 |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*
+* AAD Format with 32-bit Sequence Number
+*
+* if AAD is 12 bytes:
+* AAD[3] = {A0, A1, A2};
+* padded AAD in xmm register = {A2 A1 A0 0}
+*
+* 0 1 2 3
+* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | SPI (A2) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 64-bit Extended Sequence Number {A1,A0} |
+* | |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 0x0 |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*
+* AAD Format with 64-bit Extended Sequence Number
+*
+* aadLen:
+* from the definition of the spec, aadLen can only be 8 or 12 bytes.
+* The code supports 16 too but for other sizes, the code will fail.
+*
+* TLen:
+* from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
+* For other sizes, the code will fail.
+*
+* poly = x^128 + x^127 + x^126 + x^121 + 1
+*
+*****************************************************************************/
+
+ENTRY(aesni_gcm_dec)
+ push %r12
+ push %r13
+ push %r14
+ mov %rsp, %r14
+/*
+* states of %xmm registers %xmm6:%xmm15 not saved
+* all %xmm registers are clobbered
+*/
+ sub $VARIABLE_OFFSET, %rsp
+ and $~63, %rsp # align rsp to 64 bytes
+ mov %arg6, %r12
+ movdqu (%r12), %xmm13 # %xmm13 = HashKey
+ movdqa SHUF_MASK(%rip), %xmm2
+ PSHUFB_XMM %xmm2, %xmm13
+
+
+# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH)
+
+ movdqa %xmm13, %xmm2
+ psllq $1, %xmm13
+ psrlq $63, %xmm2
+ movdqa %xmm2, %xmm1
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ por %xmm2, %xmm13
+
+ # Reduction
+
+ pshufd $0x24, %xmm1, %xmm2
+ pcmpeqd TWOONE(%rip), %xmm2
+ pand POLY(%rip), %xmm2
+ pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly)
+
+
+ # Decrypt first few blocks
+
+ movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly)
+ mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext
+ and $-16, %r13 # %r13 = %r13 - (%r13 mod 16)
+ mov %r13, %r12
+ and $(3<<4), %r12
+ jz _initial_num_blocks_is_0_decrypt
+ cmp $(2<<4), %r12
+ jb _initial_num_blocks_is_1_decrypt
+ je _initial_num_blocks_is_2_decrypt
+_initial_num_blocks_is_3_decrypt:
+ INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec
+ sub $48, %r13
+ jmp _initial_blocks_decrypted
+_initial_num_blocks_is_2_decrypt:
+ INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec
+ sub $32, %r13
+ jmp _initial_blocks_decrypted
+_initial_num_blocks_is_1_decrypt:
+ INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec
+ sub $16, %r13
+ jmp _initial_blocks_decrypted
+_initial_num_blocks_is_0_decrypt:
+ INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec
+_initial_blocks_decrypted:
+ cmp $0, %r13
+ je _zero_cipher_left_decrypt
+ sub $64, %r13
+ je _four_cipher_left_decrypt
+_decrypt_by_4:
+ GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
+%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec
+ add $64, %r11
+ sub $64, %r13
+ jne _decrypt_by_4
+_four_cipher_left_decrypt:
+ GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
+%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
+_zero_cipher_left_decrypt:
+ mov %arg4, %r13
+ and $15, %r13 # %r13 = arg4 (mod 16)
+ je _multiple_of_16_bytes_decrypt
+
+ # Handle the last <16 byte block seperately
+
+ paddd ONE(%rip), %xmm0 # increment CNT to get Yn
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10, %xmm0
+
+ ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn)
+ sub $16, %r11
+ add %r13, %r11
+ movdqu (%arg3,%r11,1), %xmm1 # recieve the last <16 byte block
+ lea SHIFT_MASK+16(%rip), %r12
+ sub %r13, %r12
+# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
+# (%r13 is the number of bytes in plaintext mod 16)
+ movdqu (%r12), %xmm2 # get the appropriate shuffle mask
+ PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes
+
+ movdqa %xmm1, %xmm2
+ pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn)
+ movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+ # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
+ pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0
+ pand %xmm1, %xmm2
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10 ,%xmm2
+
+ pxor %xmm2, %xmm8
+ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+ # GHASH computation for the last <16 byte block
+ sub %r13, %r11
+ add $16, %r11
+
+ # output %r13 bytes
+ MOVQ_R64_XMM %xmm0, %rax
+ cmp $8, %r13
+ jle _less_than_8_bytes_left_decrypt
+ mov %rax, (%arg2 , %r11, 1)
+ add $8, %r11
+ psrldq $8, %xmm0
+ MOVQ_R64_XMM %xmm0, %rax
+ sub $8, %r13
+_less_than_8_bytes_left_decrypt:
+ mov %al, (%arg2, %r11, 1)
+ add $1, %r11
+ shr $8, %rax
+ sub $1, %r13
+ jne _less_than_8_bytes_left_decrypt
+_multiple_of_16_bytes_decrypt:
+ mov arg8, %r12 # %r13 = aadLen (number of bytes)
+ shl $3, %r12 # convert into number of bits
+ movd %r12d, %xmm15 # len(A) in %xmm15
+ shl $3, %arg4 # len(C) in bits (*128)
+ MOVQ_R64_XMM %arg4, %xmm1
+ pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
+ pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
+ pxor %xmm15, %xmm8
+ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+ # final GHASH computation
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10, %xmm8
+
+ mov %arg5, %rax # %rax = *Y0
+ movdqu (%rax), %xmm0 # %xmm0 = Y0
+ ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
+ pxor %xmm8, %xmm0
+_return_T_decrypt:
+ mov arg9, %r10 # %r10 = authTag
+ mov arg10, %r11 # %r11 = auth_tag_len
+ cmp $16, %r11
+ je _T_16_decrypt
+ cmp $12, %r11
+ je _T_12_decrypt
+_T_8_decrypt:
+ MOVQ_R64_XMM %xmm0, %rax
+ mov %rax, (%r10)
+ jmp _return_T_done_decrypt
+_T_12_decrypt:
+ MOVQ_R64_XMM %xmm0, %rax
+ mov %rax, (%r10)
+ psrldq $8, %xmm0
+ movd %xmm0, %eax
+ mov %eax, 8(%r10)
+ jmp _return_T_done_decrypt
+_T_16_decrypt:
+ movdqu %xmm0, (%r10)
+_return_T_done_decrypt:
+ mov %r14, %rsp
+ pop %r14
+ pop %r13
+ pop %r12
+ ret
+
+
+/*****************************************************************************
+* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
+* u8 *out, // Ciphertext output. Encrypt in-place is allowed.
+* const u8 *in, // Plaintext input
+* u64 plaintext_len, // Length of data in bytes for encryption.
+* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association)
+* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
+* // concatenated with 0x00000001. 16-byte aligned pointer.
+* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary.
+* const u8 *aad, // Additional Authentication Data (AAD)
+* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
+* u8 *auth_tag, // Authenticated Tag output.
+* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
+* // 12 or 8.
+*
+* Assumptions:
+*
+* keys:
+* keys are pre-expanded and aligned to 16 bytes. we are using the
+* first set of 11 keys in the data structure void *aes_ctx
+*
+*
+* iv:
+* 0 1 2 3
+* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | Salt (From the SA) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | Initialization Vector |
+* | (This is the sequence number from IPSec header) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 0x1 |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*
+*
+*
+* AAD:
+* AAD padded to 128 bits with 0
+* for example, assume AAD is a u32 vector
+*
+* if AAD is 8 bytes:
+* AAD[3] = {A0, A1};
+* padded AAD in xmm register = {A1 A0 0 0}
+*
+* 0 1 2 3
+* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | SPI (A1) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 32-bit Sequence Number (A0) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 0x0 |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*
+* AAD Format with 32-bit Sequence Number
+*
+* if AAD is 12 bytes:
+* AAD[3] = {A0, A1, A2};
+* padded AAD in xmm register = {A2 A1 A0 0}
+*
+* 0 1 2 3
+* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | SPI (A2) |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 64-bit Extended Sequence Number {A1,A0} |
+* | |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+* | 0x0 |
+* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*
+* AAD Format with 64-bit Extended Sequence Number
+*
+* aadLen:
+* from the definition of the spec, aadLen can only be 8 or 12 bytes.
+* The code supports 16 too but for other sizes, the code will fail.
+*
+* TLen:
+* from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
+* For other sizes, the code will fail.
+*
+* poly = x^128 + x^127 + x^126 + x^121 + 1
+***************************************************************************/
+ENTRY(aesni_gcm_enc)
+ push %r12
+ push %r13
+ push %r14
+ mov %rsp, %r14
+#
+# states of %xmm registers %xmm6:%xmm15 not saved
+# all %xmm registers are clobbered
+#
+ sub $VARIABLE_OFFSET, %rsp
+ and $~63, %rsp
+ mov %arg6, %r12
+ movdqu (%r12), %xmm13
+ movdqa SHUF_MASK(%rip), %xmm2
+ PSHUFB_XMM %xmm2, %xmm13
+
+
+# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
+
+ movdqa %xmm13, %xmm2
+ psllq $1, %xmm13
+ psrlq $63, %xmm2
+ movdqa %xmm2, %xmm1
+ pslldq $8, %xmm2
+ psrldq $8, %xmm1
+ por %xmm2, %xmm13
+
+ # reduce HashKey<<1
+
+ pshufd $0x24, %xmm1, %xmm2
+ pcmpeqd TWOONE(%rip), %xmm2
+ pand POLY(%rip), %xmm2
+ pxor %xmm2, %xmm13
+ movdqa %xmm13, HashKey(%rsp)
+ mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly)
+ and $-16, %r13
+ mov %r13, %r12
+
+ # Encrypt first few blocks
+
+ and $(3<<4), %r12
+ jz _initial_num_blocks_is_0_encrypt
+ cmp $(2<<4), %r12
+ jb _initial_num_blocks_is_1_encrypt
+ je _initial_num_blocks_is_2_encrypt
+_initial_num_blocks_is_3_encrypt:
+ INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc
+ sub $48, %r13
+ jmp _initial_blocks_encrypted
+_initial_num_blocks_is_2_encrypt:
+ INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc
+ sub $32, %r13
+ jmp _initial_blocks_encrypted
+_initial_num_blocks_is_1_encrypt:
+ INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc
+ sub $16, %r13
+ jmp _initial_blocks_encrypted
+_initial_num_blocks_is_0_encrypt:
+ INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc
+_initial_blocks_encrypted:
+
+ # Main loop - Encrypt remaining blocks
+
+ cmp $0, %r13
+ je _zero_cipher_left_encrypt
+ sub $64, %r13
+ je _four_cipher_left_encrypt
+_encrypt_by_4_encrypt:
+ GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
+%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc
+ add $64, %r11
+ sub $64, %r13
+ jne _encrypt_by_4_encrypt
+_four_cipher_left_encrypt:
+ GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
+%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
+_zero_cipher_left_encrypt:
+ mov %arg4, %r13
+ and $15, %r13 # %r13 = arg4 (mod 16)
+ je _multiple_of_16_bytes_encrypt
+
+ # Handle the last <16 Byte block seperately
+ paddd ONE(%rip), %xmm0 # INCR CNT to get Yn
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10, %xmm0
+
+ ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
+ sub $16, %r11
+ add %r13, %r11
+ movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks
+ lea SHIFT_MASK+16(%rip), %r12
+ sub %r13, %r12
+ # adjust the shuffle mask pointer to be able to shift 16-r13 bytes
+ # (%r13 is the number of bytes in plaintext mod 16)
+ movdqu (%r12), %xmm2 # get the appropriate shuffle mask
+ PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte
+ pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn)
+ movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+ # get the appropriate mask to mask out top 16-r13 bytes of xmm0
+ pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10,%xmm0
+
+ pxor %xmm0, %xmm8
+ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+ # GHASH computation for the last <16 byte block
+ sub %r13, %r11
+ add $16, %r11
+ PSHUFB_XMM %xmm10, %xmm1
+
+ # shuffle xmm0 back to output as ciphertext
+
+ # Output %r13 bytes
+ MOVQ_R64_XMM %xmm0, %rax
+ cmp $8, %r13
+ jle _less_than_8_bytes_left_encrypt
+ mov %rax, (%arg2 , %r11, 1)
+ add $8, %r11
+ psrldq $8, %xmm0
+ MOVQ_R64_XMM %xmm0, %rax
+ sub $8, %r13
+_less_than_8_bytes_left_encrypt:
+ mov %al, (%arg2, %r11, 1)
+ add $1, %r11
+ shr $8, %rax
+ sub $1, %r13
+ jne _less_than_8_bytes_left_encrypt
+_multiple_of_16_bytes_encrypt:
+ mov arg8, %r12 # %r12 = addLen (number of bytes)
+ shl $3, %r12
+ movd %r12d, %xmm15 # len(A) in %xmm15
+ shl $3, %arg4 # len(C) in bits (*128)
+ MOVQ_R64_XMM %arg4, %xmm1
+ pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
+ pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
+ pxor %xmm15, %xmm8
+ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+ # final GHASH computation
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap
+
+ mov %arg5, %rax # %rax = *Y0
+ movdqu (%rax), %xmm0 # %xmm0 = Y0
+ ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0)
+ pxor %xmm8, %xmm0
+_return_T_encrypt:
+ mov arg9, %r10 # %r10 = authTag
+ mov arg10, %r11 # %r11 = auth_tag_len
+ cmp $16, %r11
+ je _T_16_encrypt
+ cmp $12, %r11
+ je _T_12_encrypt
+_T_8_encrypt:
+ MOVQ_R64_XMM %xmm0, %rax
+ mov %rax, (%r10)
+ jmp _return_T_done_encrypt
+_T_12_encrypt:
+ MOVQ_R64_XMM %xmm0, %rax
+ mov %rax, (%r10)
+ psrldq $8, %xmm0
+ movd %xmm0, %eax
+ mov %eax, 8(%r10)
+ jmp _return_T_done_encrypt
+_T_16_encrypt:
+ movdqu %xmm0, (%r10)
+_return_T_done_encrypt:
+ mov %r14, %rsp
+ pop %r14
+ pop %r13
+ pop %r12
+ ret
+
+#endif
+
_key_expansion_128:
_key_expansion_256a:
@@ -55,10 +1709,11 @@ _key_expansion_256a:
shufps $0b10001100, %xmm0, %xmm4
pxor %xmm4, %xmm0
pxor %xmm1, %xmm0
- movaps %xmm0, (%rcx)
- add $0x10, %rcx
+ movaps %xmm0, (TKEYP)
+ add $0x10, TKEYP
ret
+.align 4
_key_expansion_192a:
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
@@ -76,12 +1731,13 @@ _key_expansion_192a:
movaps %xmm0, %xmm1
shufps $0b01000100, %xmm0, %xmm6
- movaps %xmm6, (%rcx)
+ movaps %xmm6, (TKEYP)
shufps $0b01001110, %xmm2, %xmm1
- movaps %xmm1, 16(%rcx)
- add $0x20, %rcx
+ movaps %xmm1, 0x10(TKEYP)
+ add $0x20, TKEYP
ret
+.align 4
_key_expansion_192b:
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
@@ -96,10 +1752,11 @@ _key_expansion_192b:
pxor %xmm3, %xmm2
pxor %xmm5, %xmm2
- movaps %xmm0, (%rcx)
- add $0x10, %rcx
+ movaps %xmm0, (TKEYP)
+ add $0x10, TKEYP
ret
+.align 4
_key_expansion_256b:
pshufd $0b10101010, %xmm1, %xmm1
shufps $0b00010000, %xmm2, %xmm4
@@ -107,8 +1764,8 @@ _key_expansion_256b:
shufps $0b10001100, %xmm2, %xmm4
pxor %xmm4, %xmm2
pxor %xmm1, %xmm2
- movaps %xmm2, (%rcx)
- add $0x10, %rcx
+ movaps %xmm2, (TKEYP)
+ add $0x10, TKEYP
ret
/*
@@ -116,17 +1773,23 @@ _key_expansion_256b:
* unsigned int key_len)
*/
ENTRY(aesni_set_key)
- movups (%rsi), %xmm0 # user key (first 16 bytes)
- movaps %xmm0, (%rdi)
- lea 0x10(%rdi), %rcx # key addr
- movl %edx, 480(%rdi)
+#ifndef __x86_64__
+ pushl KEYP
+ movl 8(%esp), KEYP # ctx
+ movl 12(%esp), UKEYP # in_key
+ movl 16(%esp), %edx # key_len
+#endif
+ movups (UKEYP), %xmm0 # user key (first 16 bytes)
+ movaps %xmm0, (KEYP)
+ lea 0x10(KEYP), TKEYP # key addr
+ movl %edx, 480(KEYP)
pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
cmp $24, %dl
jb .Lenc_key128
je .Lenc_key192
- movups 0x10(%rsi), %xmm2 # other user key
- movaps %xmm2, (%rcx)
- add $0x10, %rcx
+ movups 0x10(UKEYP), %xmm2 # other user key
+ movaps %xmm2, (TKEYP)
+ add $0x10, TKEYP
AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
call _key_expansion_256a
AESKEYGENASSIST 0x1 %xmm0 %xmm1
@@ -155,7 +1818,7 @@ ENTRY(aesni_set_key)
call _key_expansion_256a
jmp .Ldec_key
.Lenc_key192:
- movq 0x10(%rsi), %xmm2 # other user key
+ movq 0x10(UKEYP), %xmm2 # other user key
AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
call _key_expansion_192a
AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
@@ -195,33 +1858,47 @@ ENTRY(aesni_set_key)
AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
call _key_expansion_128
.Ldec_key:
- sub $0x10, %rcx
- movaps (%rdi), %xmm0
- movaps (%rcx), %xmm1
- movaps %xmm0, 240(%rcx)
- movaps %xmm1, 240(%rdi)
- add $0x10, %rdi
- lea 240-16(%rcx), %rsi
+ sub $0x10, TKEYP
+ movaps (KEYP), %xmm0
+ movaps (TKEYP), %xmm1
+ movaps %xmm0, 240(TKEYP)
+ movaps %xmm1, 240(KEYP)
+ add $0x10, KEYP
+ lea 240-16(TKEYP), UKEYP
.align 4
.Ldec_key_loop:
- movaps (%rdi), %xmm0
+ movaps (KEYP), %xmm0
AESIMC %xmm0 %xmm1
- movaps %xmm1, (%rsi)
- add $0x10, %rdi
- sub $0x10, %rsi
- cmp %rcx, %rdi
+ movaps %xmm1, (UKEYP)
+ add $0x10, KEYP
+ sub $0x10, UKEYP
+ cmp TKEYP, KEYP
jb .Ldec_key_loop
- xor %rax, %rax
+ xor AREG, AREG
+#ifndef __x86_64__
+ popl KEYP
+#endif
ret
/*
* void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
*/
ENTRY(aesni_enc)
+#ifndef __x86_64__
+ pushl KEYP
+ pushl KLEN
+ movl 12(%esp), KEYP
+ movl 16(%esp), OUTP
+ movl 20(%esp), INP
+#endif
movl 480(KEYP), KLEN # key length
movups (INP), STATE # input
call _aesni_enc1
movups STATE, (OUTP) # output
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+#endif
ret
/*
@@ -236,6 +1913,7 @@ ENTRY(aesni_enc)
* KEY
* TKEYP (T1)
*/
+.align 4
_aesni_enc1:
movaps (KEYP), KEY # key
mov KEYP, TKEYP
@@ -298,6 +1976,7 @@ _aesni_enc1:
* KEY
* TKEYP (T1)
*/
+.align 4
_aesni_enc4:
movaps (KEYP), KEY # key
mov KEYP, TKEYP
@@ -391,11 +2070,22 @@ _aesni_enc4:
* void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
*/
ENTRY(aesni_dec)
+#ifndef __x86_64__
+ pushl KEYP
+ pushl KLEN
+ movl 12(%esp), KEYP
+ movl 16(%esp), OUTP
+ movl 20(%esp), INP
+#endif
mov 480(KEYP), KLEN # key length
add $240, KEYP
movups (INP), STATE # input
call _aesni_dec1
movups STATE, (OUTP) #output
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+#endif
ret
/*
@@ -410,6 +2100,7 @@ ENTRY(aesni_dec)
* KEY
* TKEYP (T1)
*/
+.align 4
_aesni_dec1:
movaps (KEYP), KEY # key
mov KEYP, TKEYP
@@ -472,6 +2163,7 @@ _aesni_dec1:
* KEY
* TKEYP (T1)
*/
+.align 4
_aesni_dec4:
movaps (KEYP), KEY # key
mov KEYP, TKEYP
@@ -566,6 +2258,15 @@ _aesni_dec4:
* size_t len)
*/
ENTRY(aesni_ecb_enc)
+#ifndef __x86_64__
+ pushl LEN
+ pushl KEYP
+ pushl KLEN
+ movl 16(%esp), KEYP
+ movl 20(%esp), OUTP
+ movl 24(%esp), INP
+ movl 28(%esp), LEN
+#endif
test LEN, LEN # check length
jz .Lecb_enc_ret
mov 480(KEYP), KLEN
@@ -602,6 +2303,11 @@ ENTRY(aesni_ecb_enc)
cmp $16, LEN
jge .Lecb_enc_loop1
.Lecb_enc_ret:
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+ popl LEN
+#endif
ret
/*
@@ -609,6 +2315,15 @@ ENTRY(aesni_ecb_enc)
* size_t len);
*/
ENTRY(aesni_ecb_dec)
+#ifndef __x86_64__
+ pushl LEN
+ pushl KEYP
+ pushl KLEN
+ movl 16(%esp), KEYP
+ movl 20(%esp), OUTP
+ movl 24(%esp), INP
+ movl 28(%esp), LEN
+#endif
test LEN, LEN
jz .Lecb_dec_ret
mov 480(KEYP), KLEN
@@ -646,6 +2361,11 @@ ENTRY(aesni_ecb_dec)
cmp $16, LEN
jge .Lecb_dec_loop1
.Lecb_dec_ret:
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+ popl LEN
+#endif
ret
/*
@@ -653,6 +2373,17 @@ ENTRY(aesni_ecb_dec)
* size_t len, u8 *iv)
*/
ENTRY(aesni_cbc_enc)
+#ifndef __x86_64__
+ pushl IVP
+ pushl LEN
+ pushl KEYP
+ pushl KLEN
+ movl 20(%esp), KEYP
+ movl 24(%esp), OUTP
+ movl 28(%esp), INP
+ movl 32(%esp), LEN
+ movl 36(%esp), IVP
+#endif
cmp $16, LEN
jb .Lcbc_enc_ret
mov 480(KEYP), KLEN
@@ -670,6 +2401,12 @@ ENTRY(aesni_cbc_enc)
jge .Lcbc_enc_loop
movups STATE, (IVP)
.Lcbc_enc_ret:
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+ popl LEN
+ popl IVP
+#endif
ret
/*
@@ -677,6 +2414,17 @@ ENTRY(aesni_cbc_enc)
* size_t len, u8 *iv)
*/
ENTRY(aesni_cbc_dec)
+#ifndef __x86_64__
+ pushl IVP
+ pushl LEN
+ pushl KEYP
+ pushl KLEN
+ movl 20(%esp), KEYP
+ movl 24(%esp), OUTP
+ movl 28(%esp), INP
+ movl 32(%esp), LEN
+ movl 36(%esp), IVP
+#endif
cmp $16, LEN
jb .Lcbc_dec_just_ret
mov 480(KEYP), KLEN
@@ -690,16 +2438,30 @@ ENTRY(aesni_cbc_dec)
movaps IN1, STATE1
movups 0x10(INP), IN2
movaps IN2, STATE2
+#ifdef __x86_64__
movups 0x20(INP), IN3
movaps IN3, STATE3
movups 0x30(INP), IN4
movaps IN4, STATE4
+#else
+ movups 0x20(INP), IN1
+ movaps IN1, STATE3
+ movups 0x30(INP), IN2
+ movaps IN2, STATE4
+#endif
call _aesni_dec4
pxor IV, STATE1
+#ifdef __x86_64__
pxor IN1, STATE2
pxor IN2, STATE3
pxor IN3, STATE4
movaps IN4, IV
+#else
+ pxor (INP), STATE2
+ pxor 0x10(INP), STATE3
+ pxor IN1, STATE4
+ movaps IN2, IV
+#endif
movups STATE1, (OUTP)
movups STATE2, 0x10(OUTP)
movups STATE3, 0x20(OUTP)
@@ -727,8 +2489,15 @@ ENTRY(aesni_cbc_dec)
.Lcbc_dec_ret:
movups IV, (IVP)
.Lcbc_dec_just_ret:
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+ popl LEN
+ popl IVP
+#endif
ret
+#ifdef __x86_64__
.align 16
.Lbswap_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
@@ -744,6 +2513,7 @@ ENTRY(aesni_cbc_dec)
* INC: == 1, in little endian
* BSWAP_MASK == endian swapping mask
*/
+.align 4
_aesni_inc_init:
movaps .Lbswap_mask, BSWAP_MASK
movaps IV, CTR
@@ -768,6 +2538,7 @@ _aesni_inc_init:
* CTR: == output IV, in little endian
* TCTR_LOW: == lower qword of CTR
*/
+.align 4
_aesni_inc:
paddq INC, CTR
add $1, TCTR_LOW
@@ -839,3 +2610,4 @@ ENTRY(aesni_ctr_enc)
movups IV, (IVP)
.Lctr_enc_just_ret:
ret
+#endif
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2cb3dcc4490a..e1e60c7d5813 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -5,6 +5,14 @@
* Copyright (C) 2008, Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
+ * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
+ * interface for 64-bit kernels.
+ * Authors: Adrian Hoban <adrian.hoban@intel.com>
+ * Gabriele Paoloni <gabriele.paoloni@intel.com>
+ * Tadeusz Struk (tadeusz.struk@intel.com)
+ * Aidan O'Mahony (aidan.o.mahony@intel.com)
+ * Copyright (c) 2010, Intel Corporation.
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -21,6 +29,10 @@
#include <crypto/ctr.h>
#include <asm/i387.h>
#include <asm/aes.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/aead.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
#define HAS_CTR
@@ -42,8 +54,31 @@ struct async_aes_ctx {
struct cryptd_ablkcipher *cryptd_tfm;
};
-#define AESNI_ALIGN 16
+/* This data is stored at the end of the crypto_tfm struct.
+ * It's a type of per "session" data storage location.
+ * This needs to be 16 byte aligned.
+ */
+struct aesni_rfc4106_gcm_ctx {
+ u8 hash_subkey[16];
+ struct crypto_aes_ctx aes_key_expanded;
+ u8 nonce[4];
+ struct cryptd_aead *cryptd_tfm;
+};
+
+struct aesni_gcm_set_hash_subkey_result {
+ int err;
+ struct completion completion;
+};
+
+struct aesni_hash_subkey_req_data {
+ u8 iv[16];
+ struct aesni_gcm_set_hash_subkey_result result;
+ struct scatterlist sg;
+};
+
+#define AESNI_ALIGN (16)
#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
+#define RFC4106_HASH_SUBKEY_SIZE 16
asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len);
@@ -59,9 +94,62 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
+#ifdef CONFIG_X86_64
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
+/* asmlinkage void aesni_gcm_enc()
+ * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
+ * u8 *out, Ciphertext output. Encrypt in-place is allowed.
+ * const u8 *in, Plaintext input
+ * unsigned long plaintext_len, Length of data in bytes for encryption.
+ * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
+ * concatenated with 8 byte Initialisation Vector (from IPSec ESP
+ * Payload) concatenated with 0x00000001. 16-byte aligned pointer.
+ * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
+ * const u8 *aad, Additional Authentication Data (AAD)
+ * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this
+ * is going to be 8 or 12 bytes
+ * u8 *auth_tag, Authenticated Tag output.
+ * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
+ * Valid values are 16 (most likely), 12 or 8.
+ */
+asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
+ const u8 *in, unsigned long plaintext_len, u8 *iv,
+ u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
+ u8 *auth_tag, unsigned long auth_tag_len);
+
+/* asmlinkage void aesni_gcm_dec()
+ * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
+ * u8 *out, Plaintext output. Decrypt in-place is allowed.
+ * const u8 *in, Ciphertext input
+ * unsigned long ciphertext_len, Length of data in bytes for decryption.
+ * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
+ * concatenated with 8 byte Initialisation Vector (from IPSec ESP
+ * Payload) concatenated with 0x00000001. 16-byte aligned pointer.
+ * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
+ * const u8 *aad, Additional Authentication Data (AAD)
+ * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
+ * to be 8 or 12 bytes
+ * u8 *auth_tag, Authenticated Tag output.
+ * unsigned long auth_tag_len) Authenticated Tag Length in bytes.
+ * Valid values are 16 (most likely), 12 or 8.
+ */
+asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
+ const u8 *in, unsigned long ciphertext_len, u8 *iv,
+ u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
+ u8 *auth_tag, unsigned long auth_tag_len);
+
+static inline struct
+aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
+{
+ return
+ (struct aesni_rfc4106_gcm_ctx *)
+ PTR_ALIGN((u8 *)
+ crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN);
+}
+#endif
+
static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
{
unsigned long addr = (unsigned long)raw_ctx;
@@ -324,6 +412,7 @@ static struct crypto_alg blk_cbc_alg = {
},
};
+#ifdef CONFIG_X86_64
static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
struct blkcipher_walk *walk)
{
@@ -389,6 +478,7 @@ static struct crypto_alg blk_ctr_alg = {
},
},
};
+#endif
static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
@@ -536,6 +626,7 @@ static struct crypto_alg ablk_cbc_alg = {
},
};
+#ifdef CONFIG_X86_64
static int ablk_ctr_init(struct crypto_tfm *tfm)
{
struct cryptd_ablkcipher *cryptd_tfm;
@@ -612,6 +703,7 @@ static struct crypto_alg ablk_rfc3686_ctr_alg = {
},
};
#endif
+#endif
#ifdef HAS_LRW
static int ablk_lrw_init(struct crypto_tfm *tfm)
@@ -730,6 +822,424 @@ static struct crypto_alg ablk_xts_alg = {
};
#endif
+#ifdef CONFIG_X86_64
+static int rfc4106_init(struct crypto_tfm *tfm)
+{
+ struct cryptd_aead *cryptd_tfm;
+ struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *)
+ PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
+ cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ctx->cryptd_tfm = cryptd_tfm;
+ tfm->crt_aead.reqsize = sizeof(struct aead_request)
+ + crypto_aead_reqsize(&cryptd_tfm->base);
+ return 0;
+}
+
+static void rfc4106_exit(struct crypto_tfm *tfm)
+{
+ struct aesni_rfc4106_gcm_ctx *ctx =
+ (struct aesni_rfc4106_gcm_ctx *)
+ PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
+ if (!IS_ERR(ctx->cryptd_tfm))
+ cryptd_free_aead(ctx->cryptd_tfm);
+ return;
+}
+
+static void
+rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err)
+{
+ struct aesni_gcm_set_hash_subkey_result *result = req->data;
+
+ if (err == -EINPROGRESS)
+ return;
+ result->err = err;
+ complete(&result->completion);
+}
+
+static int
+rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
+{
+ struct crypto_ablkcipher *ctr_tfm;
+ struct ablkcipher_request *req;
+ int ret = -EINVAL;
+ struct aesni_hash_subkey_req_data *req_data;
+
+ ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0);
+ if (IS_ERR(ctr_tfm))
+ return PTR_ERR(ctr_tfm);
+
+ crypto_ablkcipher_clear_flags(ctr_tfm, ~0);
+
+ ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len);
+ if (ret) {
+ crypto_free_ablkcipher(ctr_tfm);
+ return ret;
+ }
+
+ req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL);
+ if (!req) {
+ crypto_free_ablkcipher(ctr_tfm);
+ return -EINVAL;
+ }
+
+ req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
+ if (!req_data) {
+ crypto_free_ablkcipher(ctr_tfm);
+ return -ENOMEM;
+ }
+ memset(req_data->iv, 0, sizeof(req_data->iv));
+
+ /* Clear the data in the hash sub key container to zero.*/
+ /* We want to cipher all zeros to create the hash sub key. */
+ memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE);
+
+ init_completion(&req_data->result.completion);
+ sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE);
+ ablkcipher_request_set_tfm(req, ctr_tfm);
+ ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
+ rfc4106_set_hash_subkey_done,
+ &req_data->result);
+
+ ablkcipher_request_set_crypt(req, &req_data->sg,
+ &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv);
+
+ ret = crypto_ablkcipher_encrypt(req);
+ if (ret == -EINPROGRESS || ret == -EBUSY) {
+ ret = wait_for_completion_interruptible
+ (&req_data->result.completion);
+ if (!ret)
+ ret = req_data->result.err;
+ }
+ ablkcipher_request_free(req);
+ kfree(req_data);
+ crypto_free_ablkcipher(ctr_tfm);
+ return ret;
+}
+
+static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
+ unsigned int key_len)
+{
+ int ret = 0;
+ struct crypto_tfm *tfm = crypto_aead_tfm(parent);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
+ u8 *new_key_mem = NULL;
+
+ if (key_len < 4) {
+ crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+ /*Account for 4 byte nonce at the end.*/
+ key_len -= 4;
+ if (key_len != AES_KEYSIZE_128) {
+ crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce));
+ /*This must be on a 16 byte boundary!*/
+ if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN)
+ return -EINVAL;
+
+ if ((unsigned long)key % AESNI_ALIGN) {
+ /*key is not aligned: use an auxuliar aligned pointer*/
+ new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL);
+ if (!new_key_mem)
+ return -ENOMEM;
+
+ new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
+ memcpy(new_key_mem, key, key_len);
+ key = new_key_mem;
+ }
+
+ if (!irq_fpu_usable())
+ ret = crypto_aes_expand_key(&(ctx->aes_key_expanded),
+ key, key_len);
+ else {
+ kernel_fpu_begin();
+ ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len);
+ kernel_fpu_end();
+ }
+ /*This must be on a 16 byte boundary!*/
+ if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
+exit:
+ kfree(new_key_mem);
+ return ret;
+}
+
+/* This is the Integrity Check Value (aka the authentication tag length and can
+ * be 8, 12 or 16 bytes long. */
+static int rfc4106_set_authsize(struct crypto_aead *parent,
+ unsigned int authsize)
+{
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
+ struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
+
+ switch (authsize) {
+ case 8:
+ case 12:
+ case 16:
+ break;
+ default:
+ return -EINVAL;
+ }
+ crypto_aead_crt(parent)->authsize = authsize;
+ crypto_aead_crt(cryptd_child)->authsize = authsize;
+ return 0;
+}
+
+static int rfc4106_encrypt(struct aead_request *req)
+{
+ int ret;
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+ struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
+
+ if (!irq_fpu_usable()) {
+ struct aead_request *cryptd_req =
+ (struct aead_request *) aead_request_ctx(req);
+ memcpy(cryptd_req, req, sizeof(*req));
+ aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+ return crypto_aead_encrypt(cryptd_req);
+ } else {
+ kernel_fpu_begin();
+ ret = cryptd_child->base.crt_aead.encrypt(req);
+ kernel_fpu_end();
+ return ret;
+ }
+}
+
+static int rfc4106_decrypt(struct aead_request *req)
+{
+ int ret;
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+ struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
+
+ if (!irq_fpu_usable()) {
+ struct aead_request *cryptd_req =
+ (struct aead_request *) aead_request_ctx(req);
+ memcpy(cryptd_req, req, sizeof(*req));
+ aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+ return crypto_aead_decrypt(cryptd_req);
+ } else {
+ kernel_fpu_begin();
+ ret = cryptd_child->base.crt_aead.decrypt(req);
+ kernel_fpu_end();
+ return ret;
+ }
+}
+
+static struct crypto_alg rfc4106_alg = {
+ .cra_name = "rfc4106(gcm(aes))",
+ .cra_driver_name = "rfc4106-gcm-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_nivaead_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list),
+ .cra_init = rfc4106_init,
+ .cra_exit = rfc4106_exit,
+ .cra_u = {
+ .aead = {
+ .setkey = rfc4106_set_key,
+ .setauthsize = rfc4106_set_authsize,
+ .encrypt = rfc4106_encrypt,
+ .decrypt = rfc4106_decrypt,
+ .geniv = "seqiv",
+ .ivsize = 8,
+ .maxauthsize = 16,
+ },
+ },
+};
+
+static int __driver_rfc4106_encrypt(struct aead_request *req)
+{
+ u8 one_entry_in_sg = 0;
+ u8 *src, *dst, *assoc;
+ __be32 counter = cpu_to_be32(1);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+ void *aes_ctx = &(ctx->aes_key_expanded);
+ unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+ u8 iv_tab[16+AESNI_ALIGN];
+ u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN);
+ struct scatter_walk src_sg_walk;
+ struct scatter_walk assoc_sg_walk;
+ struct scatter_walk dst_sg_walk;
+ unsigned int i;
+
+ /* Assuming we are supporting rfc4106 64-bit extended */
+ /* sequence numbers We need to have the AAD length equal */
+ /* to 8 or 12 bytes */
+ if (unlikely(req->assoclen != 8 && req->assoclen != 12))
+ return -EINVAL;
+ /* IV below built */
+ for (i = 0; i < 4; i++)
+ *(iv+i) = ctx->nonce[i];
+ for (i = 0; i < 8; i++)
+ *(iv+4+i) = req->iv[i];
+ *((__be32 *)(iv+12)) = counter;
+
+ if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) {
+ one_entry_in_sg = 1;
+ scatterwalk_start(&src_sg_walk, req->src);
+ scatterwalk_start(&assoc_sg_walk, req->assoc);
+ src = scatterwalk_map(&src_sg_walk, 0);
+ assoc = scatterwalk_map(&assoc_sg_walk, 0);
+ dst = src;
+ if (unlikely(req->src != req->dst)) {
+ scatterwalk_start(&dst_sg_walk, req->dst);
+ dst = scatterwalk_map(&dst_sg_walk, 0);
+ }
+
+ } else {
+ /* Allocate memory for src, dst, assoc */
+ src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen,
+ GFP_ATOMIC);
+ if (unlikely(!src))
+ return -ENOMEM;
+ assoc = (src + req->cryptlen + auth_tag_len);
+ scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
+ scatterwalk_map_and_copy(assoc, req->assoc, 0,
+ req->assoclen, 0);
+ dst = src;
+ }
+
+ aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
+ ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
+ + ((unsigned long)req->cryptlen), auth_tag_len);
+
+ /* The authTag (aka the Integrity Check Value) needs to be written
+ * back to the packet. */
+ if (one_entry_in_sg) {
+ if (unlikely(req->src != req->dst)) {
+ scatterwalk_unmap(dst, 0);
+ scatterwalk_done(&dst_sg_walk, 0, 0);
+ }
+ scatterwalk_unmap(src, 0);
+ scatterwalk_unmap(assoc, 0);
+ scatterwalk_done(&src_sg_walk, 0, 0);
+ scatterwalk_done(&assoc_sg_walk, 0, 0);
+ } else {
+ scatterwalk_map_and_copy(dst, req->dst, 0,
+ req->cryptlen + auth_tag_len, 1);
+ kfree(src);
+ }
+ return 0;
+}
+
+static int __driver_rfc4106_decrypt(struct aead_request *req)
+{
+ u8 one_entry_in_sg = 0;
+ u8 *src, *dst, *assoc;
+ unsigned long tempCipherLen = 0;
+ __be32 counter = cpu_to_be32(1);
+ int retval = 0;
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+ void *aes_ctx = &(ctx->aes_key_expanded);
+ unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+ u8 iv_and_authTag[32+AESNI_ALIGN];
+ u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN);
+ u8 *authTag = iv + 16;
+ struct scatter_walk src_sg_walk;
+ struct scatter_walk assoc_sg_walk;
+ struct scatter_walk dst_sg_walk;
+ unsigned int i;
+
+ if (unlikely((req->cryptlen < auth_tag_len) ||
+ (req->assoclen != 8 && req->assoclen != 12)))
+ return -EINVAL;
+ /* Assuming we are supporting rfc4106 64-bit extended */
+ /* sequence numbers We need to have the AAD length */
+ /* equal to 8 or 12 bytes */
+
+ tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
+ /* IV below built */
+ for (i = 0; i < 4; i++)
+ *(iv+i) = ctx->nonce[i];
+ for (i = 0; i < 8; i++)
+ *(iv+4+i) = req->iv[i];
+ *((__be32 *)(iv+12)) = counter;
+
+ if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) {
+ one_entry_in_sg = 1;
+ scatterwalk_start(&src_sg_walk, req->src);
+ scatterwalk_start(&assoc_sg_walk, req->assoc);
+ src = scatterwalk_map(&src_sg_walk, 0);
+ assoc = scatterwalk_map(&assoc_sg_walk, 0);
+ dst = src;
+ if (unlikely(req->src != req->dst)) {
+ scatterwalk_start(&dst_sg_walk, req->dst);
+ dst = scatterwalk_map(&dst_sg_walk, 0);
+ }
+
+ } else {
+ /* Allocate memory for src, dst, assoc */
+ src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
+ if (!src)
+ return -ENOMEM;
+ assoc = (src + req->cryptlen + auth_tag_len);
+ scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
+ scatterwalk_map_and_copy(assoc, req->assoc, 0,
+ req->assoclen, 0);
+ dst = src;
+ }
+
+ aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv,
+ ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
+ authTag, auth_tag_len);
+
+ /* Compare generated tag with passed in tag. */
+ retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ?
+ -EBADMSG : 0;
+
+ if (one_entry_in_sg) {
+ if (unlikely(req->src != req->dst)) {
+ scatterwalk_unmap(dst, 0);
+ scatterwalk_done(&dst_sg_walk, 0, 0);
+ }
+ scatterwalk_unmap(src, 0);
+ scatterwalk_unmap(assoc, 0);
+ scatterwalk_done(&src_sg_walk, 0, 0);
+ scatterwalk_done(&assoc_sg_walk, 0, 0);
+ } else {
+ scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1);
+ kfree(src);
+ }
+ return retval;
+}
+
+static struct crypto_alg __rfc4106_alg = {
+ .cra_name = "__gcm-aes-aesni",
+ .cra_driver_name = "__driver-gcm-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list),
+ .cra_u = {
+ .aead = {
+ .encrypt = __driver_rfc4106_encrypt,
+ .decrypt = __driver_rfc4106_decrypt,
+ },
+ },
+};
+#endif
+
static int __init aesni_init(void)
{
int err;
@@ -738,6 +1248,7 @@ static int __init aesni_init(void)
printk(KERN_INFO "Intel AES-NI instructions are not detected.\n");
return -ENODEV;
}
+
if ((err = crypto_register_alg(&aesni_alg)))
goto aes_err;
if ((err = crypto_register_alg(&__aesni_alg)))
@@ -746,18 +1257,24 @@ static int __init aesni_init(void)
goto blk_ecb_err;
if ((err = crypto_register_alg(&blk_cbc_alg)))
goto blk_cbc_err;
- if ((err = crypto_register_alg(&blk_ctr_alg)))
- goto blk_ctr_err;
if ((err = crypto_register_alg(&ablk_ecb_alg)))
goto ablk_ecb_err;
if ((err = crypto_register_alg(&ablk_cbc_alg)))
goto ablk_cbc_err;
+#ifdef CONFIG_X86_64
+ if ((err = crypto_register_alg(&blk_ctr_alg)))
+ goto blk_ctr_err;
if ((err = crypto_register_alg(&ablk_ctr_alg)))
goto ablk_ctr_err;
+ if ((err = crypto_register_alg(&__rfc4106_alg)))
+ goto __aead_gcm_err;
+ if ((err = crypto_register_alg(&rfc4106_alg)))
+ goto aead_gcm_err;
#ifdef HAS_CTR
if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
goto ablk_rfc3686_ctr_err;
#endif
+#endif
#ifdef HAS_LRW
if ((err = crypto_register_alg(&ablk_lrw_alg)))
goto ablk_lrw_err;
@@ -770,7 +1287,6 @@ static int __init aesni_init(void)
if ((err = crypto_register_alg(&ablk_xts_alg)))
goto ablk_xts_err;
#endif
-
return err;
#ifdef HAS_XTS
@@ -784,18 +1300,24 @@ ablk_pcbc_err:
crypto_unregister_alg(&ablk_lrw_alg);
ablk_lrw_err:
#endif
+#ifdef CONFIG_X86_64
#ifdef HAS_CTR
crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
ablk_rfc3686_ctr_err:
#endif
+ crypto_unregister_alg(&rfc4106_alg);
+aead_gcm_err:
+ crypto_unregister_alg(&__rfc4106_alg);
+__aead_gcm_err:
crypto_unregister_alg(&ablk_ctr_alg);
ablk_ctr_err:
+ crypto_unregister_alg(&blk_ctr_alg);
+blk_ctr_err:
+#endif
crypto_unregister_alg(&ablk_cbc_alg);
ablk_cbc_err:
crypto_unregister_alg(&ablk_ecb_alg);
ablk_ecb_err:
- crypto_unregister_alg(&blk_ctr_alg);
-blk_ctr_err:
crypto_unregister_alg(&blk_cbc_alg);
blk_cbc_err:
crypto_unregister_alg(&blk_ecb_alg);
@@ -818,13 +1340,17 @@ static void __exit aesni_exit(void)
#ifdef HAS_LRW
crypto_unregister_alg(&ablk_lrw_alg);
#endif
+#ifdef CONFIG_X86_64
#ifdef HAS_CTR
crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
#endif
+ crypto_unregister_alg(&rfc4106_alg);
+ crypto_unregister_alg(&__rfc4106_alg);
crypto_unregister_alg(&ablk_ctr_alg);
+ crypto_unregister_alg(&blk_ctr_alg);
+#endif
crypto_unregister_alg(&ablk_cbc_alg);
crypto_unregister_alg(&ablk_ecb_alg);
- crypto_unregister_alg(&blk_ctr_alg);
crypto_unregister_alg(&blk_cbc_alg);
crypto_unregister_alg(&blk_ecb_alg);
crypto_unregister_alg(&__aesni_alg);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index e4bac29a32e7..4b7cb0e691cd 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -110,7 +110,6 @@ config CRYPTO_MANAGER_DISABLE_TESTS
config CRYPTO_GF128MUL
tristate "GF(2^128) multiplication functions (EXPERIMENTAL)"
- depends on EXPERIMENTAL
help
Efficient table driven implementation of multiplications in the
field GF(2^128). This is needed by some cypher modes. This
@@ -539,8 +538,9 @@ config CRYPTO_AES_X86_64
config CRYPTO_AES_NI_INTEL
tristate "AES cipher algorithms (AES-NI)"
- depends on (X86 || UML_X86) && 64BIT
- select CRYPTO_AES_X86_64
+ depends on (X86 || UML_X86)
+ select CRYPTO_AES_X86_64 if 64BIT
+ select CRYPTO_AES_586 if !64BIT
select CRYPTO_CRYPTD
select CRYPTO_ALGAPI
select CRYPTO_FPU
@@ -563,9 +563,10 @@ config CRYPTO_AES_NI_INTEL
See <http://csrc.nist.gov/encryption/aes/> for more information.
- In addition to AES cipher algorithm support, the
- acceleration for some popular block cipher mode is supported
- too, including ECB, CBC, CTR, LRW, PCBC, XTS.
+ In addition to AES cipher algorithm support, the acceleration
+ for some popular block cipher mode is supported too, including
+ ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
+ acceleration for CTR.
config CRYPTO_ANUBIS
tristate "Anubis cipher algorithm"
@@ -841,6 +842,27 @@ config CRYPTO_ANSI_CPRNG
ANSI X9.31 A.2.4. Note that this option must be enabled if
CRYPTO_FIPS is selected
+config CRYPTO_USER_API
+ tristate
+
+config CRYPTO_USER_API_HASH
+ tristate "User-space interface for hash algorithms"
+ depends on NET
+ select CRYPTO_HASH
+ select CRYPTO_USER_API
+ help
+ This option enables the user-spaces interface for hash
+ algorithms.
+
+config CRYPTO_USER_API_SKCIPHER
+ tristate "User-space interface for symmetric key cipher algorithms"
+ depends on NET
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_USER_API
+ help
+ This option enables the user-spaces interface for symmetric
+ key cipher algorithms.
+
source "drivers/crypto/Kconfig"
endif # if CRYPTO
diff --git a/crypto/Makefile b/crypto/Makefile
index 423b7de61f93..e9a399ca69db 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -3,32 +3,32 @@
#
obj-$(CONFIG_CRYPTO) += crypto.o
-crypto-objs := api.o cipher.o compress.o
+crypto-y := api.o cipher.o compress.o
obj-$(CONFIG_CRYPTO_WORKQUEUE) += crypto_wq.o
obj-$(CONFIG_CRYPTO_FIPS) += fips.o
crypto_algapi-$(CONFIG_PROC_FS) += proc.o
-crypto_algapi-objs := algapi.o scatterwalk.o $(crypto_algapi-y)
+crypto_algapi-y := algapi.o scatterwalk.o $(crypto_algapi-y)
obj-$(CONFIG_CRYPTO_ALGAPI2) += crypto_algapi.o
obj-$(CONFIG_CRYPTO_AEAD2) += aead.o
-crypto_blkcipher-objs := ablkcipher.o
-crypto_blkcipher-objs += blkcipher.o
+crypto_blkcipher-y := ablkcipher.o
+crypto_blkcipher-y += blkcipher.o
obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o
obj-$(CONFIG_CRYPTO_BLKCIPHER2) += chainiv.o
obj-$(CONFIG_CRYPTO_BLKCIPHER2) += eseqiv.o
obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
-crypto_hash-objs += ahash.o
-crypto_hash-objs += shash.o
+crypto_hash-y += ahash.o
+crypto_hash-y += shash.o
obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o
-cryptomgr-objs := algboss.o testmgr.o
+cryptomgr-y := algboss.o testmgr.o
obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
@@ -85,6 +85,9 @@ obj-$(CONFIG_CRYPTO_RNG2) += krng.o
obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o
obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
+obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
+obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
+obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
#
# generic algorithms and the async_tx api
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
new file mode 100644
index 000000000000..940d70cb5c25
--- /dev/null
+++ b/crypto/af_alg.c
@@ -0,0 +1,483 @@
+/*
+ * af_alg: User-space algorithm interface
+ *
+ * This file provides the user-space API for algorithms.
+ *
+ * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/atomic.h>
+#include <crypto/if_alg.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/rwsem.h>
+
+struct alg_type_list {
+ const struct af_alg_type *type;
+ struct list_head list;
+};
+
+static atomic_long_t alg_memory_allocated;
+
+static struct proto alg_proto = {
+ .name = "ALG",
+ .owner = THIS_MODULE,
+ .memory_allocated = &alg_memory_allocated,
+ .obj_size = sizeof(struct alg_sock),
+};
+
+static LIST_HEAD(alg_types);
+static DECLARE_RWSEM(alg_types_sem);
+
+static const struct af_alg_type *alg_get_type(const char *name)
+{
+ const struct af_alg_type *type = ERR_PTR(-ENOENT);
+ struct alg_type_list *node;
+
+ down_read(&alg_types_sem);
+ list_for_each_entry(node, &alg_types, list) {
+ if (strcmp(node->type->name, name))
+ continue;
+
+ if (try_module_get(node->type->owner))
+ type = node->type;
+ break;
+ }
+ up_read(&alg_types_sem);
+
+ return type;
+}
+
+int af_alg_register_type(const struct af_alg_type *type)
+{
+ struct alg_type_list *node;
+ int err = -EEXIST;
+
+ down_write(&alg_types_sem);
+ list_for_each_entry(node, &alg_types, list) {
+ if (!strcmp(node->type->name, type->name))
+ goto unlock;
+ }
+
+ node = kmalloc(sizeof(*node), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!node)
+ goto unlock;
+
+ type->ops->owner = THIS_MODULE;
+ node->type = type;
+ list_add(&node->list, &alg_types);
+ err = 0;
+
+unlock:
+ up_write(&alg_types_sem);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_register_type);
+
+int af_alg_unregister_type(const struct af_alg_type *type)
+{
+ struct alg_type_list *node;
+ int err = -ENOENT;
+
+ down_write(&alg_types_sem);
+ list_for_each_entry(node, &alg_types, list) {
+ if (strcmp(node->type->name, type->name))
+ continue;
+
+ list_del(&node->list);
+ kfree(node);
+ err = 0;
+ break;
+ }
+ up_write(&alg_types_sem);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_unregister_type);
+
+static void alg_do_release(const struct af_alg_type *type, void *private)
+{
+ if (!type)
+ return;
+
+ type->release(private);
+ module_put(type->owner);
+}
+
+int af_alg_release(struct socket *sock)
+{
+ if (sock->sk)
+ sock_put(sock->sk);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_release);
+
+static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct sockaddr_alg *sa = (void *)uaddr;
+ const struct af_alg_type *type;
+ void *private;
+
+ if (sock->state == SS_CONNECTED)
+ return -EINVAL;
+
+ if (addr_len != sizeof(*sa))
+ return -EINVAL;
+
+ sa->salg_type[sizeof(sa->salg_type) - 1] = 0;
+ sa->salg_name[sizeof(sa->salg_name) - 1] = 0;
+
+ type = alg_get_type(sa->salg_type);
+ if (IS_ERR(type) && PTR_ERR(type) == -ENOENT) {
+ request_module("algif-%s", sa->salg_type);
+ type = alg_get_type(sa->salg_type);
+ }
+
+ if (IS_ERR(type))
+ return PTR_ERR(type);
+
+ private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask);
+ if (IS_ERR(private)) {
+ module_put(type->owner);
+ return PTR_ERR(private);
+ }
+
+ lock_sock(sk);
+
+ swap(ask->type, type);
+ swap(ask->private, private);
+
+ release_sock(sk);
+
+ alg_do_release(type, private);
+
+ return 0;
+}
+
+static int alg_setkey(struct sock *sk, char __user *ukey,
+ unsigned int keylen)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ const struct af_alg_type *type = ask->type;
+ u8 *key;
+ int err;
+
+ key = sock_kmalloc(sk, keylen, GFP_KERNEL);
+ if (!key)
+ return -ENOMEM;
+
+ err = -EFAULT;
+ if (copy_from_user(key, ukey, keylen))
+ goto out;
+
+ err = type->setkey(ask->private, key, keylen);
+
+out:
+ sock_kfree_s(sk, key, keylen);
+
+ return err;
+}
+
+static int alg_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ const struct af_alg_type *type;
+ int err = -ENOPROTOOPT;
+
+ lock_sock(sk);
+ type = ask->type;
+
+ if (level != SOL_ALG || !type)
+ goto unlock;
+
+ switch (optname) {
+ case ALG_SET_KEY:
+ if (sock->state == SS_CONNECTED)
+ goto unlock;
+ if (!type->setkey)
+ goto unlock;
+
+ err = alg_setkey(sk, optval, optlen);
+ }
+
+unlock:
+ release_sock(sk);
+
+ return err;
+}
+
+int af_alg_accept(struct sock *sk, struct socket *newsock)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ const struct af_alg_type *type;
+ struct sock *sk2;
+ int err;
+
+ lock_sock(sk);
+ type = ask->type;
+
+ err = -EINVAL;
+ if (!type)
+ goto unlock;
+
+ sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto);
+ err = -ENOMEM;
+ if (!sk2)
+ goto unlock;
+
+ sock_init_data(newsock, sk2);
+ sock_graft(sk2, newsock);
+
+ err = type->accept(ask->private, sk2);
+ if (err) {
+ sk_free(sk2);
+ goto unlock;
+ }
+
+ sk2->sk_family = PF_ALG;
+
+ sock_hold(sk);
+ alg_sk(sk2)->parent = sk;
+ alg_sk(sk2)->type = type;
+
+ newsock->ops = type->ops;
+ newsock->state = SS_CONNECTED;
+
+ err = 0;
+
+unlock:
+ release_sock(sk);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_accept);
+
+static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+ return af_alg_accept(sock->sk, newsock);
+}
+
+static const struct proto_ops alg_proto_ops = {
+ .family = PF_ALG,
+ .owner = THIS_MODULE,
+
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .getname = sock_no_getname,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .getsockopt = sock_no_getsockopt,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+ .sendmsg = sock_no_sendmsg,
+ .recvmsg = sock_no_recvmsg,
+ .poll = sock_no_poll,
+
+ .bind = alg_bind,
+ .release = af_alg_release,
+ .setsockopt = alg_setsockopt,
+ .accept = alg_accept,
+};
+
+static void alg_sock_destruct(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+
+ alg_do_release(ask->type, ask->private);
+}
+
+static int alg_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
+{
+ struct sock *sk;
+ int err;
+
+ if (sock->type != SOCK_SEQPACKET)
+ return -ESOCKTNOSUPPORT;
+ if (protocol != 0)
+ return -EPROTONOSUPPORT;
+
+ err = -ENOMEM;
+ sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto);
+ if (!sk)
+ goto out;
+
+ sock->ops = &alg_proto_ops;
+ sock_init_data(sock, sk);
+
+ sk->sk_family = PF_ALG;
+ sk->sk_destruct = alg_sock_destruct;
+
+ return 0;
+out:
+ return err;
+}
+
+static const struct net_proto_family alg_family = {
+ .family = PF_ALG,
+ .create = alg_create,
+ .owner = THIS_MODULE,
+};
+
+int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len,
+ int write)
+{
+ unsigned long from = (unsigned long)addr;
+ unsigned long npages;
+ unsigned off;
+ int err;
+ int i;
+
+ err = -EFAULT;
+ if (!access_ok(write ? VERIFY_READ : VERIFY_WRITE, addr, len))
+ goto out;
+
+ off = from & ~PAGE_MASK;
+ npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (npages > ALG_MAX_PAGES)
+ npages = ALG_MAX_PAGES;
+
+ err = get_user_pages_fast(from, npages, write, sgl->pages);
+ if (err < 0)
+ goto out;
+
+ npages = err;
+ err = -EINVAL;
+ if (WARN_ON(npages == 0))
+ goto out;
+
+ err = 0;
+
+ sg_init_table(sgl->sg, npages);
+
+ for (i = 0; i < npages; i++) {
+ int plen = min_t(int, len, PAGE_SIZE - off);
+
+ sg_set_page(sgl->sg + i, sgl->pages[i], plen, off);
+
+ off = 0;
+ len -= plen;
+ err += plen;
+ }
+
+out:
+ return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_make_sg);
+
+void af_alg_free_sg(struct af_alg_sgl *sgl)
+{
+ int i;
+
+ i = 0;
+ do {
+ put_page(sgl->pages[i]);
+ } while (!sg_is_last(sgl->sg + (i++)));
+}
+EXPORT_SYMBOL_GPL(af_alg_free_sg);
+
+int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con)
+{
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+ if (cmsg->cmsg_level != SOL_ALG)
+ continue;
+
+ switch(cmsg->cmsg_type) {
+ case ALG_SET_IV:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(*con->iv)))
+ return -EINVAL;
+ con->iv = (void *)CMSG_DATA(cmsg);
+ if (cmsg->cmsg_len < CMSG_LEN(con->iv->ivlen +
+ sizeof(*con->iv)))
+ return -EINVAL;
+ break;
+
+ case ALG_SET_OP:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(u32)))
+ return -EINVAL;
+ con->op = *(u32 *)CMSG_DATA(cmsg);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_cmsg_send);
+
+int af_alg_wait_for_completion(int err, struct af_alg_completion *completion)
+{
+ switch (err) {
+ case -EINPROGRESS:
+ case -EBUSY:
+ wait_for_completion(&completion->completion);
+ INIT_COMPLETION(completion->completion);
+ err = completion->err;
+ break;
+ };
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_wait_for_completion);
+
+void af_alg_complete(struct crypto_async_request *req, int err)
+{
+ struct af_alg_completion *completion = req->data;
+
+ completion->err = err;
+ complete(&completion->completion);
+}
+EXPORT_SYMBOL_GPL(af_alg_complete);
+
+static int __init af_alg_init(void)
+{
+ int err = proto_register(&alg_proto, 0);
+
+ if (err)
+ goto out;
+
+ err = sock_register(&alg_family);
+ if (err != 0)
+ goto out_unregister_proto;
+
+out:
+ return err;
+
+out_unregister_proto:
+ proto_unregister(&alg_proto);
+ goto out;
+}
+
+static void __exit af_alg_exit(void)
+{
+ sock_unregister(PF_ALG);
+ proto_unregister(&alg_proto);
+}
+
+module_init(af_alg_init);
+module_exit(af_alg_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(AF_ALG);
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
new file mode 100644
index 000000000000..62122a1a2f7a
--- /dev/null
+++ b/crypto/algif_hash.c
@@ -0,0 +1,319 @@
+/*
+ * algif_hash: User-space interface for hash algorithms
+ *
+ * This file provides the user-space API for hash algorithms.
+ *
+ * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/hash.h>
+#include <crypto/if_alg.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <net/sock.h>
+
+struct hash_ctx {
+ struct af_alg_sgl sgl;
+
+ u8 *result;
+
+ struct af_alg_completion completion;
+
+ unsigned int len;
+ bool more;
+
+ struct ahash_request req;
+};
+
+static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
+ struct msghdr *msg, size_t ignored)
+{
+ int limit = ALG_MAX_PAGES * PAGE_SIZE;
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct hash_ctx *ctx = ask->private;
+ unsigned long iovlen;
+ struct iovec *iov;
+ long copied = 0;
+ int err;
+
+ if (limit > sk->sk_sndbuf)
+ limit = sk->sk_sndbuf;
+
+ lock_sock(sk);
+ if (!ctx->more) {
+ err = crypto_ahash_init(&ctx->req);
+ if (err)
+ goto unlock;
+ }
+
+ ctx->more = 0;
+
+ for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0;
+ iovlen--, iov++) {
+ unsigned long seglen = iov->iov_len;
+ char __user *from = iov->iov_base;
+
+ while (seglen) {
+ int len = min_t(unsigned long, seglen, limit);
+ int newlen;
+
+ newlen = af_alg_make_sg(&ctx->sgl, from, len, 0);
+ if (newlen < 0)
+ goto unlock;
+
+ ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL,
+ newlen);
+
+ err = af_alg_wait_for_completion(
+ crypto_ahash_update(&ctx->req),
+ &ctx->completion);
+
+ af_alg_free_sg(&ctx->sgl);
+
+ if (err)
+ goto unlock;
+
+ seglen -= newlen;
+ from += newlen;
+ copied += newlen;
+ }
+ }
+
+ err = 0;
+
+ ctx->more = msg->msg_flags & MSG_MORE;
+ if (!ctx->more) {
+ ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
+ err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
+ &ctx->completion);
+ }
+
+unlock:
+ release_sock(sk);
+
+ return err ?: copied;
+}
+
+static ssize_t hash_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct hash_ctx *ctx = ask->private;
+ int err;
+
+ lock_sock(sk);
+ sg_init_table(ctx->sgl.sg, 1);
+ sg_set_page(ctx->sgl.sg, page, size, offset);
+
+ ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size);
+
+ if (!(flags & MSG_MORE)) {
+ if (ctx->more)
+ err = crypto_ahash_finup(&ctx->req);
+ else
+ err = crypto_ahash_digest(&ctx->req);
+ } else {
+ if (!ctx->more) {
+ err = crypto_ahash_init(&ctx->req);
+ if (err)
+ goto unlock;
+ }
+
+ err = crypto_ahash_update(&ctx->req);
+ }
+
+ err = af_alg_wait_for_completion(err, &ctx->completion);
+ if (err)
+ goto unlock;
+
+ ctx->more = flags & MSG_MORE;
+
+unlock:
+ release_sock(sk);
+
+ return err ?: size;
+}
+
+static int hash_recvmsg(struct kiocb *unused, struct socket *sock,
+ struct msghdr *msg, size_t len, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct hash_ctx *ctx = ask->private;
+ unsigned ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+ int err;
+
+ if (len > ds)
+ len = ds;
+ else if (len < ds)
+ msg->msg_flags |= MSG_TRUNC;
+
+ lock_sock(sk);
+ if (ctx->more) {
+ ctx->more = 0;
+ ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
+ err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
+ &ctx->completion);
+ if (err)
+ goto unlock;
+ }
+
+ err = memcpy_toiovec(msg->msg_iov, ctx->result, len);
+
+unlock:
+ release_sock(sk);
+
+ return err ?: len;
+}
+
+static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct hash_ctx *ctx = ask->private;
+ struct ahash_request *req = &ctx->req;
+ char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))];
+ struct sock *sk2;
+ struct alg_sock *ask2;
+ struct hash_ctx *ctx2;
+ int err;
+
+ err = crypto_ahash_export(req, state);
+ if (err)
+ return err;
+
+ err = af_alg_accept(ask->parent, newsock);
+ if (err)
+ return err;
+
+ sk2 = newsock->sk;
+ ask2 = alg_sk(sk2);
+ ctx2 = ask2->private;
+ ctx2->more = 1;
+
+ err = crypto_ahash_import(&ctx2->req, state);
+ if (err) {
+ sock_orphan(sk2);
+ sock_put(sk2);
+ }
+
+ return err;
+}
+
+static struct proto_ops algif_hash_ops = {
+ .family = PF_ALG,
+
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .getname = sock_no_getname,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .getsockopt = sock_no_getsockopt,
+ .mmap = sock_no_mmap,
+ .bind = sock_no_bind,
+ .setsockopt = sock_no_setsockopt,
+ .poll = sock_no_poll,
+
+ .release = af_alg_release,
+ .sendmsg = hash_sendmsg,
+ .sendpage = hash_sendpage,
+ .recvmsg = hash_recvmsg,
+ .accept = hash_accept,
+};
+
+static void *hash_bind(const char *name, u32 type, u32 mask)
+{
+ return crypto_alloc_ahash(name, type, mask);
+}
+
+static void hash_release(void *private)
+{
+ crypto_free_ahash(private);
+}
+
+static int hash_setkey(void *private, const u8 *key, unsigned int keylen)
+{
+ return crypto_ahash_setkey(private, key, keylen);
+}
+
+static void hash_sock_destruct(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct hash_ctx *ctx = ask->private;
+
+ sock_kfree_s(sk, ctx->result,
+ crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req)));
+ sock_kfree_s(sk, ctx, ctx->len);
+ af_alg_release_parent(sk);
+}
+
+static int hash_accept_parent(void *private, struct sock *sk)
+{
+ struct hash_ctx *ctx;
+ struct alg_sock *ask = alg_sk(sk);
+ unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private);
+ unsigned ds = crypto_ahash_digestsize(private);
+
+ ctx = sock_kmalloc(sk, len, GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL);
+ if (!ctx->result) {
+ sock_kfree_s(sk, ctx, len);
+ return -ENOMEM;
+ }
+
+ memset(ctx->result, 0, ds);
+
+ ctx->len = len;
+ ctx->more = 0;
+ af_alg_init_completion(&ctx->completion);
+
+ ask->private = ctx;
+
+ ahash_request_set_tfm(&ctx->req, private);
+ ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ af_alg_complete, &ctx->completion);
+
+ sk->sk_destruct = hash_sock_destruct;
+
+ return 0;
+}
+
+static const struct af_alg_type algif_type_hash = {
+ .bind = hash_bind,
+ .release = hash_release,
+ .setkey = hash_setkey,
+ .accept = hash_accept_parent,
+ .ops = &algif_hash_ops,
+ .name = "hash",
+ .owner = THIS_MODULE
+};
+
+static int __init algif_hash_init(void)
+{
+ return af_alg_register_type(&algif_type_hash);
+}
+
+static void __exit algif_hash_exit(void)
+{
+ int err = af_alg_unregister_type(&algif_type_hash);
+ BUG_ON(err);
+}
+
+module_init(algif_hash_init);
+module_exit(algif_hash_exit);
+MODULE_LICENSE("GPL");
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
new file mode 100644
index 000000000000..6a6dfc062d2a
--- /dev/null
+++ b/crypto/algif_skcipher.c
@@ -0,0 +1,632 @@
+/*
+ * algif_skcipher: User-space interface for skcipher algorithms
+ *
+ * This file provides the user-space API for symmetric key ciphers.
+ *
+ * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
+#include <crypto/if_alg.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <net/sock.h>
+
+struct skcipher_sg_list {
+ struct list_head list;
+
+ int cur;
+
+ struct scatterlist sg[0];
+};
+
+struct skcipher_ctx {
+ struct list_head tsgl;
+ struct af_alg_sgl rsgl;
+
+ void *iv;
+
+ struct af_alg_completion completion;
+
+ unsigned used;
+
+ unsigned int len;
+ bool more;
+ bool merge;
+ bool enc;
+
+ struct ablkcipher_request req;
+};
+
+#define MAX_SGL_ENTS ((PAGE_SIZE - sizeof(struct skcipher_sg_list)) / \
+ sizeof(struct scatterlist) - 1)
+
+static inline int skcipher_sndbuf(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+
+ return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
+ ctx->used, 0);
+}
+
+static inline bool skcipher_writable(struct sock *sk)
+{
+ return PAGE_SIZE <= skcipher_sndbuf(sk);
+}
+
+static int skcipher_alloc_sgl(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+ struct skcipher_sg_list *sgl;
+ struct scatterlist *sg = NULL;
+
+ sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
+ if (!list_empty(&ctx->tsgl))
+ sg = sgl->sg;
+
+ if (!sg || sgl->cur >= MAX_SGL_ENTS) {
+ sgl = sock_kmalloc(sk, sizeof(*sgl) +
+ sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
+ GFP_KERNEL);
+ if (!sgl)
+ return -ENOMEM;
+
+ sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
+ sgl->cur = 0;
+
+ if (sg)
+ scatterwalk_sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
+
+ list_add_tail(&sgl->list, &ctx->tsgl);
+ }
+
+ return 0;
+}
+
+static void skcipher_pull_sgl(struct sock *sk, int used)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+ struct skcipher_sg_list *sgl;
+ struct scatterlist *sg;
+ int i;
+
+ while (!list_empty(&ctx->tsgl)) {
+ sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list,
+ list);
+ sg = sgl->sg;
+
+ for (i = 0; i < sgl->cur; i++) {
+ int plen = min_t(int, used, sg[i].length);
+
+ if (!sg_page(sg + i))
+ continue;
+
+ sg[i].length -= plen;
+ sg[i].offset += plen;
+
+ used -= plen;
+ ctx->used -= plen;
+
+ if (sg[i].length)
+ return;
+
+ put_page(sg_page(sg + i));
+ sg_assign_page(sg + i, NULL);
+ }
+
+ list_del(&sgl->list);
+ sock_kfree_s(sk, sgl,
+ sizeof(*sgl) + sizeof(sgl->sg[0]) *
+ (MAX_SGL_ENTS + 1));
+ }
+
+ if (!ctx->used)
+ ctx->merge = 0;
+}
+
+static void skcipher_free_sgl(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+
+ skcipher_pull_sgl(sk, ctx->used);
+}
+
+static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
+{
+ long timeout;
+ DEFINE_WAIT(wait);
+ int err = -ERESTARTSYS;
+
+ if (flags & MSG_DONTWAIT)
+ return -EAGAIN;
+
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+ for (;;) {
+ if (signal_pending(current))
+ break;
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ if (sk_wait_event(sk, &timeout, skcipher_writable(sk))) {
+ err = 0;
+ break;
+ }
+ }
+ finish_wait(sk_sleep(sk), &wait);
+
+ return err;
+}
+
+static void skcipher_wmem_wakeup(struct sock *sk)
+{
+ struct socket_wq *wq;
+
+ if (!skcipher_writable(sk))
+ return;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+ POLLRDNORM |
+ POLLRDBAND);
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ rcu_read_unlock();
+}
+
+static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+ long timeout;
+ DEFINE_WAIT(wait);
+ int err = -ERESTARTSYS;
+
+ if (flags & MSG_DONTWAIT) {
+ return -EAGAIN;
+ }
+
+ set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+
+ for (;;) {
+ if (signal_pending(current))
+ break;
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ if (sk_wait_event(sk, &timeout, ctx->used)) {
+ err = 0;
+ break;
+ }
+ }
+ finish_wait(sk_sleep(sk), &wait);
+
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+
+ return err;
+}
+
+static void skcipher_data_wakeup(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+ struct socket_wq *wq;
+
+ if (!ctx->used)
+ return;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
+ POLLRDNORM |
+ POLLRDBAND);
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ rcu_read_unlock();
+}
+
+static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock,
+ struct msghdr *msg, size_t size)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req);
+ unsigned ivsize = crypto_ablkcipher_ivsize(tfm);
+ struct skcipher_sg_list *sgl;
+ struct af_alg_control con = {};
+ long copied = 0;
+ bool enc = 0;
+ int err;
+ int i;
+
+ if (msg->msg_controllen) {
+ err = af_alg_cmsg_send(msg, &con);
+ if (err)
+ return err;
+
+ switch (con.op) {
+ case ALG_OP_ENCRYPT:
+ enc = 1;
+ break;
+ case ALG_OP_DECRYPT:
+ enc = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (con.iv && con.iv->ivlen != ivsize)
+ return -EINVAL;
+ }
+
+ err = -EINVAL;
+
+ lock_sock(sk);
+ if (!ctx->more && ctx->used)
+ goto unlock;
+
+ if (!ctx->used) {
+ ctx->enc = enc;
+ if (con.iv)
+ memcpy(ctx->iv, con.iv->iv, ivsize);
+ }
+
+ while (size) {
+ struct scatterlist *sg;
+ unsigned long len = size;
+ int plen;
+
+ if (ctx->merge) {
+ sgl = list_entry(ctx->tsgl.prev,
+ struct skcipher_sg_list, list);
+ sg = sgl->sg + sgl->cur - 1;
+ len = min_t(unsigned long, len,
+ PAGE_SIZE - sg->offset - sg->length);
+
+ err = memcpy_fromiovec(page_address(sg_page(sg)) +
+ sg->offset + sg->length,
+ msg->msg_iov, len);
+ if (err)
+ goto unlock;
+
+ sg->length += len;
+ ctx->merge = (sg->offset + sg->length) &
+ (PAGE_SIZE - 1);
+
+ ctx->used += len;
+ copied += len;
+ size -= len;
+ continue;
+ }
+
+ if (!skcipher_writable(sk)) {
+ err = skcipher_wait_for_wmem(sk, msg->msg_flags);
+ if (err)
+ goto unlock;
+ }
+
+ len = min_t(unsigned long, len, skcipher_sndbuf(sk));
+
+ err = skcipher_alloc_sgl(sk);
+ if (err)
+ goto unlock;
+
+ sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
+ sg = sgl->sg;
+ do {
+ i = sgl->cur;
+ plen = min_t(int, len, PAGE_SIZE);
+
+ sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
+ err = -ENOMEM;
+ if (!sg_page(sg + i))
+ goto unlock;
+
+ err = memcpy_fromiovec(page_address(sg_page(sg + i)),
+ msg->msg_iov, plen);
+ if (err) {
+ __free_page(sg_page(sg + i));
+ sg_assign_page(sg + i, NULL);
+ goto unlock;
+ }
+
+ sg[i].length = plen;
+ len -= plen;
+ ctx->used += plen;
+ copied += plen;
+ size -= plen;
+ sgl->cur++;
+ } while (len && sgl->cur < MAX_SGL_ENTS);
+
+ ctx->merge = plen & (PAGE_SIZE - 1);
+ }
+
+ err = 0;
+
+ ctx->more = msg->msg_flags & MSG_MORE;
+ if (!ctx->more && !list_empty(&ctx->tsgl))
+ sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
+
+unlock:
+ skcipher_data_wakeup(sk);
+ release_sock(sk);
+
+ return copied ?: err;
+}
+
+static ssize_t skcipher_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+ struct skcipher_sg_list *sgl;
+ int err = -EINVAL;
+
+ lock_sock(sk);
+ if (!ctx->more && ctx->used)
+ goto unlock;
+
+ if (!size)
+ goto done;
+
+ if (!skcipher_writable(sk)) {
+ err = skcipher_wait_for_wmem(sk, flags);
+ if (err)
+ goto unlock;
+ }
+
+ err = skcipher_alloc_sgl(sk);
+ if (err)
+ goto unlock;
+
+ ctx->merge = 0;
+ sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
+
+ get_page(page);
+ sg_set_page(sgl->sg + sgl->cur, page, size, offset);
+ sgl->cur++;
+ ctx->used += size;
+
+done:
+ ctx->more = flags & MSG_MORE;
+ if (!ctx->more && !list_empty(&ctx->tsgl))
+ sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
+
+unlock:
+ skcipher_data_wakeup(sk);
+ release_sock(sk);
+
+ return err ?: size;
+}
+
+static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
+ struct msghdr *msg, size_t ignored, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+ unsigned bs = crypto_ablkcipher_blocksize(crypto_ablkcipher_reqtfm(
+ &ctx->req));
+ struct skcipher_sg_list *sgl;
+ struct scatterlist *sg;
+ unsigned long iovlen;
+ struct iovec *iov;
+ int err = -EAGAIN;
+ int used;
+ long copied = 0;
+
+ lock_sock(sk);
+ for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0;
+ iovlen--, iov++) {
+ unsigned long seglen = iov->iov_len;
+ char __user *from = iov->iov_base;
+
+ while (seglen) {
+ sgl = list_first_entry(&ctx->tsgl,
+ struct skcipher_sg_list, list);
+ sg = sgl->sg;
+
+ while (!sg->length)
+ sg++;
+
+ used = ctx->used;
+ if (!used) {
+ err = skcipher_wait_for_data(sk, flags);
+ if (err)
+ goto unlock;
+ }
+
+ used = min_t(unsigned long, used, seglen);
+
+ used = af_alg_make_sg(&ctx->rsgl, from, used, 1);
+ err = used;
+ if (err < 0)
+ goto unlock;
+
+ if (ctx->more || used < ctx->used)
+ used -= used % bs;
+
+ err = -EINVAL;
+ if (!used)
+ goto free;
+
+ ablkcipher_request_set_crypt(&ctx->req, sg,
+ ctx->rsgl.sg, used,
+ ctx->iv);
+
+ err = af_alg_wait_for_completion(
+ ctx->enc ?
+ crypto_ablkcipher_encrypt(&ctx->req) :
+ crypto_ablkcipher_decrypt(&ctx->req),
+ &ctx->completion);
+
+free:
+ af_alg_free_sg(&ctx->rsgl);
+
+ if (err)
+ goto unlock;
+
+ copied += used;
+ from += used;
+ seglen -= used;
+ skcipher_pull_sgl(sk, used);
+ }
+ }
+
+ err = 0;
+
+unlock:
+ skcipher_wmem_wakeup(sk);
+ release_sock(sk);
+
+ return copied ?: err;
+}
+
+
+static unsigned int skcipher_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+ unsigned int mask;
+
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
+
+ if (ctx->used)
+ mask |= POLLIN | POLLRDNORM;
+
+ if (skcipher_writable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+ return mask;
+}
+
+static struct proto_ops algif_skcipher_ops = {
+ .family = PF_ALG,
+
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .getname = sock_no_getname,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .getsockopt = sock_no_getsockopt,
+ .mmap = sock_no_mmap,
+ .bind = sock_no_bind,
+ .accept = sock_no_accept,
+ .setsockopt = sock_no_setsockopt,
+
+ .release = af_alg_release,
+ .sendmsg = skcipher_sendmsg,
+ .sendpage = skcipher_sendpage,
+ .recvmsg = skcipher_recvmsg,
+ .poll = skcipher_poll,
+};
+
+static void *skcipher_bind(const char *name, u32 type, u32 mask)
+{
+ return crypto_alloc_ablkcipher(name, type, mask);
+}
+
+static void skcipher_release(void *private)
+{
+ crypto_free_ablkcipher(private);
+}
+
+static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
+{
+ return crypto_ablkcipher_setkey(private, key, keylen);
+}
+
+static void skcipher_sock_destruct(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct skcipher_ctx *ctx = ask->private;
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req);
+
+ skcipher_free_sgl(sk);
+ sock_kfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm));
+ sock_kfree_s(sk, ctx, ctx->len);
+ af_alg_release_parent(sk);
+}
+
+static int skcipher_accept_parent(void *private, struct sock *sk)
+{
+ struct skcipher_ctx *ctx;
+ struct alg_sock *ask = alg_sk(sk);
+ unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(private);
+
+ ctx = sock_kmalloc(sk, len, GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(private),
+ GFP_KERNEL);
+ if (!ctx->iv) {
+ sock_kfree_s(sk, ctx, len);
+ return -ENOMEM;
+ }
+
+ memset(ctx->iv, 0, crypto_ablkcipher_ivsize(private));
+
+ INIT_LIST_HEAD(&ctx->tsgl);
+ ctx->len = len;
+ ctx->used = 0;
+ ctx->more = 0;
+ ctx->merge = 0;
+ ctx->enc = 0;
+ af_alg_init_completion(&ctx->completion);
+
+ ask->private = ctx;
+
+ ablkcipher_request_set_tfm(&ctx->req, private);
+ ablkcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ af_alg_complete, &ctx->completion);
+
+ sk->sk_destruct = skcipher_sock_destruct;
+
+ return 0;
+}
+
+static const struct af_alg_type algif_type_skcipher = {
+ .bind = skcipher_bind,
+ .release = skcipher_release,
+ .setkey = skcipher_setkey,
+ .accept = skcipher_accept_parent,
+ .ops = &algif_skcipher_ops,
+ .name = "skcipher",
+ .owner = THIS_MODULE
+};
+
+static int __init algif_skcipher_init(void)
+{
+ return af_alg_register_type(&algif_type_skcipher);
+}
+
+static void __exit algif_skcipher_exit(void)
+{
+ int err = af_alg_unregister_type(&algif_type_skcipher);
+ BUG_ON(err);
+}
+
+module_init(algif_skcipher_init);
+module_exit(algif_skcipher_exit);
+MODULE_LICENSE("GPL");
diff --git a/crypto/authenc.c b/crypto/authenc.c
index a5a22cfcd07b..5ef7ba6b6a76 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -107,20 +107,6 @@ badkey:
goto out;
}
-static void authenc_chain(struct scatterlist *head, struct scatterlist *sg,
- int chain)
-{
- if (chain) {
- head->length += sg->length;
- sg = scatterwalk_sg_next(sg);
- }
-
- if (sg)
- scatterwalk_sg_chain(head, 2, sg);
- else
- sg_mark_end(head);
-}
-
static void authenc_geniv_ahash_update_done(struct crypto_async_request *areq,
int err)
{
@@ -345,7 +331,7 @@ static int crypto_authenc_genicv(struct aead_request *req, u8 *iv,
if (ivsize) {
sg_init_table(cipher, 2);
sg_set_buf(cipher, iv, ivsize);
- authenc_chain(cipher, dst, vdst == iv + ivsize);
+ scatterwalk_crypto_chain(cipher, dst, vdst == iv + ivsize, 2);
dst = cipher;
cryptlen += ivsize;
}
@@ -354,7 +340,7 @@ static int crypto_authenc_genicv(struct aead_request *req, u8 *iv,
authenc_ahash_fn = crypto_authenc_ahash;
sg_init_table(asg, 2);
sg_set_page(asg, sg_page(assoc), assoc->length, assoc->offset);
- authenc_chain(asg, dst, 0);
+ scatterwalk_crypto_chain(asg, dst, 0, 2);
dst = asg;
cryptlen += req->assoclen;
}
@@ -499,7 +485,7 @@ static int crypto_authenc_iverify(struct aead_request *req, u8 *iv,
if (ivsize) {
sg_init_table(cipher, 2);
sg_set_buf(cipher, iv, ivsize);
- authenc_chain(cipher, src, vsrc == iv + ivsize);
+ scatterwalk_crypto_chain(cipher, src, vsrc == iv + ivsize, 2);
src = cipher;
cryptlen += ivsize;
}
@@ -508,7 +494,7 @@ static int crypto_authenc_iverify(struct aead_request *req, u8 *iv,
authenc_ahash_fn = crypto_authenc_ahash;
sg_init_table(asg, 2);
sg_set_page(asg, sg_page(assoc), assoc->length, assoc->offset);
- authenc_chain(asg, src, 0);
+ scatterwalk_crypto_chain(asg, src, 0, 2);
src = asg;
cryptlen += req->assoclen;
}
diff --git a/crypto/cast5.c b/crypto/cast5.c
index a1d2294b50ad..4a230ddec877 100644
--- a/crypto/cast5.c
+++ b/crypto/cast5.c
@@ -604,36 +604,23 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
* Rounds 3, 6, 9, 12, and 15 use f function Type 3.
*/
+ t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
+ t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
+ t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
+ t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
+ t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
+ t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
+ t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
+ t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
+ t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
+ t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
+ t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
+ t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
if (!(c->rr)) {
- t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
- t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
- t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
- t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
- t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
- t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
- t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
- t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
- t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
- t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
- t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
- t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]);
- } else {
- t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
- t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
- t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
- t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
- t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
- t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
- t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
- t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
- t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
- t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
- t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
- t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
}
/* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and
@@ -663,32 +650,19 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
- t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
- t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
- t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
- t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
- t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
- t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
- t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
- t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
- t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
- t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
- t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
- t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
- } else {
- t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
- t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
- t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
- t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
- t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
- t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
- t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
- t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
- t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
- t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
- t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
- t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
}
+ t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
+ t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
+ t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
+ t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
+ t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
+ t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
+ t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
+ t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
+ t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
+ t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
+ t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
+ t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
dst[0] = cpu_to_be32(r);
dst[1] = cpu_to_be32(l);
diff --git a/crypto/crypto_wq.c b/crypto/crypto_wq.c
index fdcf6248f152..b980ee1af459 100644
--- a/crypto/crypto_wq.c
+++ b/crypto/crypto_wq.c
@@ -20,7 +20,8 @@ EXPORT_SYMBOL_GPL(kcrypto_wq);
static int __init crypto_wq_init(void)
{
- kcrypto_wq = create_workqueue("crypto");
+ kcrypto_wq = alloc_workqueue("crypto",
+ WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1);
if (unlikely(!kcrypto_wq))
return -ENOMEM;
return 0;
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 463dc859aa05..cbc7a33a9600 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -48,12 +48,11 @@ static int deflate_comp_init(struct deflate_ctx *ctx)
int ret = 0;
struct z_stream_s *stream = &ctx->comp_stream;
- stream->workspace = vmalloc(zlib_deflate_workspacesize());
+ stream->workspace = vzalloc(zlib_deflate_workspacesize());
if (!stream->workspace) {
ret = -ENOMEM;
goto out;
}
- memset(stream->workspace, 0, zlib_deflate_workspacesize());
ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
-DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL,
Z_DEFAULT_STRATEGY);
diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c
index 3ca3b669d5d5..42ce9f570aec 100644
--- a/crypto/eseqiv.c
+++ b/crypto/eseqiv.c
@@ -62,20 +62,6 @@ out:
skcipher_givcrypt_complete(req, err);
}
-static void eseqiv_chain(struct scatterlist *head, struct scatterlist *sg,
- int chain)
-{
- if (chain) {
- head->length += sg->length;
- sg = scatterwalk_sg_next(sg);
- }
-
- if (sg)
- scatterwalk_sg_chain(head, 2, sg);
- else
- sg_mark_end(head);
-}
-
static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req)
{
struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
@@ -124,13 +110,13 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req)
sg_init_table(reqctx->src, 2);
sg_set_buf(reqctx->src, giv, ivsize);
- eseqiv_chain(reqctx->src, osrc, vsrc == giv + ivsize);
+ scatterwalk_crypto_chain(reqctx->src, osrc, vsrc == giv + ivsize, 2);
dst = reqctx->src;
if (osrc != odst) {
sg_init_table(reqctx->dst, 2);
sg_set_buf(reqctx->dst, giv, ivsize);
- eseqiv_chain(reqctx->dst, odst, vdst == giv + ivsize);
+ scatterwalk_crypto_chain(reqctx->dst, odst, vdst == giv + ivsize, 2);
dst = reqctx->dst;
}
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 2f5fbba6576c..1a252639ef91 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -1102,21 +1102,6 @@ static int crypto_rfc4543_setauthsize(struct crypto_aead *parent,
return crypto_aead_setauthsize(ctx->child, authsize);
}
-/* this is the same as crypto_authenc_chain */
-static void crypto_rfc4543_chain(struct scatterlist *head,
- struct scatterlist *sg, int chain)
-{
- if (chain) {
- head->length += sg->length;
- sg = scatterwalk_sg_next(sg);
- }
-
- if (sg)
- scatterwalk_sg_chain(head, 2, sg);
- else
- sg_mark_end(head);
-}
-
static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req,
int enc)
{
@@ -1154,13 +1139,13 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req,
sg_init_table(payload, 2);
sg_set_buf(payload, req->iv, 8);
- crypto_rfc4543_chain(payload, dst, vdst == req->iv + 8);
+ scatterwalk_crypto_chain(payload, dst, vdst == req->iv + 8, 2);
assoclen += 8 + req->cryptlen - (enc ? 0 : authsize);
sg_init_table(assoc, 2);
sg_set_page(assoc, sg_page(req->assoc), req->assoc->length,
req->assoc->offset);
- crypto_rfc4543_chain(assoc, payload, 0);
+ scatterwalk_crypto_chain(assoc, payload, 0, 2);
aead_request_set_tfm(subreq, ctx->child);
aead_request_set_callback(subreq, req->base.flags, req->base.complete,
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 75586f1f86e7..29a89dad68b6 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -455,7 +455,8 @@ static int pcrypt_init_padata(struct padata_pcrypt *pcrypt,
get_online_cpus();
- pcrypt->wq = create_workqueue(name);
+ pcrypt->wq = alloc_workqueue(name,
+ WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1);
if (!pcrypt->wq)
goto err;
diff --git a/crypto/rmd128.c b/crypto/rmd128.c
index 1ceb6735aa53..8a0f68b7f257 100644
--- a/crypto/rmd128.c
+++ b/crypto/rmd128.c
@@ -5,7 +5,7 @@
*
* Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC
*
- * Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch>
+ * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -325,4 +325,5 @@ module_init(rmd128_mod_init);
module_exit(rmd128_mod_fini);
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
MODULE_DESCRIPTION("RIPEMD-128 Message Digest");
diff --git a/crypto/rmd160.c b/crypto/rmd160.c
index 472261fc913f..525d7bb752cf 100644
--- a/crypto/rmd160.c
+++ b/crypto/rmd160.c
@@ -5,7 +5,7 @@
*
* Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC
*
- * Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch>
+ * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -369,4 +369,5 @@ module_init(rmd160_mod_init);
module_exit(rmd160_mod_fini);
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
MODULE_DESCRIPTION("RIPEMD-160 Message Digest");
diff --git a/crypto/rmd256.c b/crypto/rmd256.c
index 72eafa8d2e7b..69293d9b56e0 100644
--- a/crypto/rmd256.c
+++ b/crypto/rmd256.c
@@ -5,7 +5,7 @@
*
* Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC
*
- * Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch>
+ * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -344,4 +344,5 @@ module_init(rmd256_mod_init);
module_exit(rmd256_mod_fini);
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
MODULE_DESCRIPTION("RIPEMD-256 Message Digest");
diff --git a/crypto/rmd320.c b/crypto/rmd320.c
index 86becaba2f05..09f97dfdfbba 100644
--- a/crypto/rmd320.c
+++ b/crypto/rmd320.c
@@ -5,7 +5,7 @@
*
* Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC
*
- * Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch>
+ * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -393,4 +393,5 @@ module_init(rmd320_mod_init);
module_exit(rmd320_mod_fini);
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
MODULE_DESCRIPTION("RIPEMD-320 Message Digest");
diff --git a/crypto/shash.c b/crypto/shash.c
index 22fd9433141f..76f74b963151 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -310,7 +310,13 @@ static int shash_async_export(struct ahash_request *req, void *out)
static int shash_async_import(struct ahash_request *req, const void *in)
{
- return crypto_shash_import(ahash_request_ctx(req), in);
+ struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+ struct shash_desc *desc = ahash_request_ctx(req);
+
+ desc->tfm = *ctx;
+ desc->flags = req->base.flags;
+
+ return crypto_shash_import(desc, in);
}
static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 3ca68f9fc14d..9aac5e58be94 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -8,6 +8,13 @@
* Copyright (c) 2002 Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) 2007 Nokia Siemens Networks
*
+ * Updated RFC4106 AES-GCM testing.
+ * Authors: Aidan O'Mahony (aidan.o.mahony@intel.com)
+ * Adrian Hoban <adrian.hoban@intel.com>
+ * Gabriele Paoloni <gabriele.paoloni@intel.com>
+ * Tadeusz Struk (tadeusz.struk@intel.com)
+ * Copyright (c) 2010, Intel Corporation.
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
@@ -980,6 +987,10 @@ static int do_test(int m)
ret += tcrypt_test("ansi_cprng");
break;
+ case 151:
+ ret += tcrypt_test("rfc4106(gcm(aes))");
+ break;
+
case 200:
test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index fa8c8f78c8d4..27ea9fe9476f 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -6,6 +6,13 @@
* Copyright (c) 2007 Nokia Siemens Networks
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
*
+ * Updated RFC4106 AES-GCM testing.
+ * Authors: Aidan O'Mahony (aidan.o.mahony@intel.com)
+ * Adrian Hoban <adrian.hoban@intel.com>
+ * Gabriele Paoloni <gabriele.paoloni@intel.com>
+ * Tadeusz Struk (tadeusz.struk@intel.com)
+ * Copyright (c) 2010, Intel Corporation.
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
@@ -2242,6 +2249,23 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+ .alg = "rfc4106(gcm(aes))",
+ .test = alg_test_aead,
+ .suite = {
+ .aead = {
+ .enc = {
+ .vecs = aes_gcm_rfc4106_enc_tv_template,
+ .count = AES_GCM_4106_ENC_TEST_VECTORS
+ },
+ .dec = {
+ .vecs = aes_gcm_rfc4106_dec_tv_template,
+ .count = AES_GCM_4106_DEC_TEST_VECTORS
+ }
+ }
+ }
+ }, {
+
+
.alg = "rfc4309(ccm(aes))",
.test = alg_test_aead,
.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 74e35377fd30..834af7f2adee 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -6,6 +6,15 @@
* Copyright (c) 2007 Nokia Siemens Networks
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
*
+ * Updated RFC4106 AES-GCM testing. Some test vectors were taken from
+ * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/
+ * gcm/gcm-test-vectors.tar.gz
+ * Authors: Aidan O'Mahony (aidan.o.mahony@intel.com)
+ * Adrian Hoban <adrian.hoban@intel.com>
+ * Gabriele Paoloni <gabriele.paoloni@intel.com>
+ * Tadeusz Struk (tadeusz.struk@intel.com)
+ * Copyright (c) 2010, Intel Corporation.
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
@@ -2947,6 +2956,8 @@ static struct cipher_testvec cast6_dec_tv_template[] = {
#define AES_CTR_3686_DEC_TEST_VECTORS 6
#define AES_GCM_ENC_TEST_VECTORS 9
#define AES_GCM_DEC_TEST_VECTORS 8
+#define AES_GCM_4106_ENC_TEST_VECTORS 7
+#define AES_GCM_4106_DEC_TEST_VECTORS 7
#define AES_CCM_ENC_TEST_VECTORS 7
#define AES_CCM_DEC_TEST_VECTORS 7
#define AES_CCM_4309_ENC_TEST_VECTORS 7
@@ -5829,6 +5840,356 @@ static struct aead_testvec aes_gcm_dec_tv_template[] = {
}
};
+static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
+ { /* Generated using Crypto++ */
+ .key = zeroed_string,
+ .klen = 20,
+ .iv = zeroed_string,
+ .input = zeroed_string,
+ .ilen = 16,
+ .assoc = zeroed_string,
+ .alen = 8,
+ .result = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92"
+ "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78"
+ "\x97\xFE\x4C\x23\x37\x42\x01\xE0"
+ "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B",
+ .rlen = 32,
+ },{
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
+ "\x00\x00\x00\x00",
+ .input = zeroed_string,
+ .ilen = 16,
+ .assoc = zeroed_string,
+ .alen = 8,
+ .result = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18"
+ "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28"
+ "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D"
+ "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF",
+ .rlen = 32,
+
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = zeroed_string,
+ .input = "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .ilen = 16,
+ .assoc = zeroed_string,
+ .alen = 8,
+ .result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
+ "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
+ "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C"
+ "\xB1\x68\xFD\x14\x52\x64\x61\xB2",
+ .rlen = 32,
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = zeroed_string,
+ .input = "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .ilen = 16,
+ .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .alen = 8,
+ .result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
+ "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
+ "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63"
+ "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5",
+ .rlen = 32,
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
+ "\x00\x00\x00\x00",
+ .input = "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .ilen = 16,
+ .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .alen = 8,
+ .result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
+ "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
+ "\x64\x50\xF9\x32\x13\xFB\x74\x61"
+ "\xF4\xED\x52\xD3\xC5\x10\x55\x3C",
+ .rlen = 32,
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
+ "\x00\x00\x00\x00",
+ .input = "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .ilen = 64,
+ .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .alen = 8,
+ .result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
+ "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
+ "\x98\x14\xA1\x42\x37\x80\xFD\x90"
+ "\x68\x12\x01\xA8\x91\x89\xB9\x83"
+ "\x5B\x11\x77\x12\x9B\xFF\x24\x89"
+ "\x94\x5F\x18\x12\xBA\x27\x09\x39"
+ "\x99\x96\x76\x42\x15\x1C\xCD\xCB"
+ "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD"
+ "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85"
+ "\xBD\xCF\x62\x98\x58\x14\xE5\xBD",
+ .rlen = 80,
+ }, {
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = "\x00\x00\x45\x67\x89\xab\xcd\xef"
+ "\x00\x00\x00\x00",
+ .input = "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff",
+ .ilen = 192,
+ .assoc = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa",
+ .alen = 12,
+ .result = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE"
+ "\xDE\x89\x3D\x42\xE7\xC9\x69\x8A"
+ "\x44\x6D\xC3\x88\x46\x2E\xC2\x01"
+ "\x5E\xF6\x0C\x39\xF0\xC4\xA5\x82"
+ "\xCD\xE8\x31\xCC\x0A\x4C\xE4\x44"
+ "\x41\xA9\x82\x6F\x22\xA1\x23\x1A"
+ "\xA8\xE3\x16\xFD\x31\x5C\x27\x31"
+ "\xF1\x7F\x01\x63\xA3\xAF\x70\xA1"
+ "\xCF\x07\x57\x41\x67\xD0\xC4\x42"
+ "\xDB\x18\xC6\x4C\x4C\xE0\x3D\x9F"
+ "\x05\x07\xFB\x13\x7D\x4A\xCA\x5B"
+ "\xF0\xBF\x64\x7E\x05\xB1\x72\xEE"
+ "\x7C\x3B\xD4\xCD\x14\x03\xB2\x2C"
+ "\xD3\xA9\xEE\xFA\x17\xFC\x9C\xDF"
+ "\xC7\x75\x40\xFF\xAE\xAD\x1E\x59"
+ "\x2F\x30\x24\xFB\xAD\x6B\x10\xFA"
+ "\x6C\x9F\x5B\xE7\x25\xD5\xD0\x25"
+ "\xAC\x4A\x4B\xDA\xFC\x7A\x85\x1B"
+ "\x7E\x13\x06\x82\x08\x17\xA4\x35"
+ "\xEC\xC5\x8D\x63\x96\x81\x0A\x8F"
+ "\xA3\x05\x38\x95\x20\x1A\x47\x04"
+ "\x6F\x6D\xDA\x8F\xEF\xC1\x76\x35"
+ "\x6B\xC7\x4D\x0F\x94\x12\xCA\x3E"
+ "\x2E\xD5\x03\x2E\x86\x7E\xAA\x3B"
+ "\x37\x08\x1C\xCF\xBA\x5D\x71\x46"
+ "\x80\x72\xB0\x4C\x82\x0D\x60\x3C",
+ .rlen = 208,
+ }
+};
+
+static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
+ { /* Generated using Crypto++ */
+ .key = zeroed_string,
+ .klen = 20,
+ .iv = zeroed_string,
+ .input = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92"
+ "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78"
+ "\x97\xFE\x4C\x23\x37\x42\x01\xE0"
+ "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B",
+ .ilen = 32,
+ .assoc = zeroed_string,
+ .alen = 8,
+ .result = zeroed_string,
+ .rlen = 16,
+
+ },{
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
+ "\x00\x00\x00\x00",
+ .input = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18"
+ "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28"
+ "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D"
+ "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF",
+ .ilen = 32,
+ .assoc = zeroed_string,
+ .alen = 8,
+ .result = zeroed_string,
+ .rlen = 16,
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = zeroed_string,
+ .input = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
+ "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
+ "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C"
+ "\xB1\x68\xFD\x14\x52\x64\x61\xB2",
+ .ilen = 32,
+ .assoc = zeroed_string,
+ .alen = 8,
+ .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .rlen = 16,
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = zeroed_string,
+ .input = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
+ "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
+ "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63"
+ "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5",
+ .ilen = 32,
+ .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .alen = 8,
+ .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .rlen = 16,
+
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
+ "\x00\x00\x00\x00",
+ .input = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
+ "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
+ "\x64\x50\xF9\x32\x13\xFB\x74\x61"
+ "\xF4\xED\x52\xD3\xC5\x10\x55\x3C",
+ .ilen = 32,
+ .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .alen = 8,
+ .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .rlen = 16,
+ }, {
+ .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+ "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
+ "\x00\x00\x00\x00",
+ .input = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
+ "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
+ "\x98\x14\xA1\x42\x37\x80\xFD\x90"
+ "\x68\x12\x01\xA8\x91\x89\xB9\x83"
+ "\x5B\x11\x77\x12\x9B\xFF\x24\x89"
+ "\x94\x5F\x18\x12\xBA\x27\x09\x39"
+ "\x99\x96\x76\x42\x15\x1C\xCD\xCB"
+ "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD"
+ "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85"
+ "\xBD\xCF\x62\x98\x58\x14\xE5\xBD",
+ .ilen = 80,
+ .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .alen = 8,
+ .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01"
+ "\x01\x01\x01\x01\x01\x01\x01\x01",
+ .rlen = 64,
+ }, {
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x00\x00\x00\x00",
+ .klen = 20,
+ .iv = "\x00\x00\x45\x67\x89\xab\xcd\xef"
+ "\x00\x00\x00\x00",
+ .input = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE"
+ "\xDE\x89\x3D\x42\xE7\xC9\x69\x8A"
+ "\x44\x6D\xC3\x88\x46\x2E\xC2\x01"
+ "\x5E\xF6\x0C\x39\xF0\xC4\xA5\x82"
+ "\xCD\xE8\x31\xCC\x0A\x4C\xE4\x44"
+ "\x41\xA9\x82\x6F\x22\xA1\x23\x1A"
+ "\xA8\xE3\x16\xFD\x31\x5C\x27\x31"
+ "\xF1\x7F\x01\x63\xA3\xAF\x70\xA1"
+ "\xCF\x07\x57\x41\x67\xD0\xC4\x42"
+ "\xDB\x18\xC6\x4C\x4C\xE0\x3D\x9F"
+ "\x05\x07\xFB\x13\x7D\x4A\xCA\x5B"
+ "\xF0\xBF\x64\x7E\x05\xB1\x72\xEE"
+ "\x7C\x3B\xD4\xCD\x14\x03\xB2\x2C"
+ "\xD3\xA9\xEE\xFA\x17\xFC\x9C\xDF"
+ "\xC7\x75\x40\xFF\xAE\xAD\x1E\x59"
+ "\x2F\x30\x24\xFB\xAD\x6B\x10\xFA"
+ "\x6C\x9F\x5B\xE7\x25\xD5\xD0\x25"
+ "\xAC\x4A\x4B\xDA\xFC\x7A\x85\x1B"
+ "\x7E\x13\x06\x82\x08\x17\xA4\x35"
+ "\xEC\xC5\x8D\x63\x96\x81\x0A\x8F"
+ "\xA3\x05\x38\x95\x20\x1A\x47\x04"
+ "\x6F\x6D\xDA\x8F\xEF\xC1\x76\x35"
+ "\x6B\xC7\x4D\x0F\x94\x12\xCA\x3E"
+ "\x2E\xD5\x03\x2E\x86\x7E\xAA\x3B"
+ "\x37\x08\x1C\xCF\xBA\x5D\x71\x46"
+ "\x80\x72\xB0\x4C\x82\x0D\x60\x3C",
+ .ilen = 208,
+ .assoc = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa",
+ .alen = 12,
+ .result = "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff",
+ .rlen = 192,
+
+ }
+};
+
static struct aead_testvec aes_ccm_enc_tv_template[] = {
{ /* From RFC 3610 */
.key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
diff --git a/crypto/zlib.c b/crypto/zlib.c
index c3015733c990..739b8fca4cea 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -95,11 +95,10 @@ static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params,
zlib_comp_exit(ctx);
workspacesize = zlib_deflate_workspacesize();
- stream->workspace = vmalloc(workspacesize);
+ stream->workspace = vzalloc(workspacesize);
if (!stream->workspace)
return -ENOMEM;
- memset(stream->workspace, 0, workspacesize);
ret = zlib_deflateInit2(stream,
tb[ZLIB_COMP_LEVEL]
? nla_get_u32(tb[ZLIB_COMP_LEVEL])
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 794aacb715c1..d0387a84eec1 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -24,6 +24,7 @@
* warranty of any kind, whether express or implied.
*/
+#include <crypto/padlock.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/hw_random.h>
@@ -34,7 +35,6 @@
#include <asm/i387.h>
-#define PFX KBUILD_MODNAME ": "
enum {
@@ -81,8 +81,7 @@ static inline u32 xstore(u32 *addr, u32 edx_in)
ts_state = irq_ts_save();
asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */"
- :"=m"(*addr), "=a"(eax_out)
- :"D"(addr), "d"(edx_in));
+ : "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr));
irq_ts_restore(ts_state);
return eax_out;
@@ -90,8 +89,10 @@ static inline u32 xstore(u32 *addr, u32 edx_in)
static int via_rng_data_present(struct hwrng *rng, int wait)
{
+ char buf[16 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
+ ((aligned(STACK_ALIGN)));
+ u32 *via_rng_datum = (u32 *)PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
u32 bytes_out;
- u32 *via_rng_datum = (u32 *)(&rng->priv);
int i;
/* We choose the recommended 1-byte-per-instruction RNG rate,
@@ -115,6 +116,7 @@ static int via_rng_data_present(struct hwrng *rng, int wait)
break;
udelay(10);
}
+ rng->priv = *via_rng_datum;
return bytes_out ? 1 : 0;
}
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 7d279e578df5..c99305afa58a 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -857,7 +857,7 @@ static int mv_cra_hash_init(struct crypto_tfm *tfm, const char *base_hash_name,
printk(KERN_WARNING MV_CESA
"Base driver '%s' could not be loaded!\n",
base_hash_name);
- err = PTR_ERR(fallback_tfm);
+ err = PTR_ERR(base_hash);
goto err_bad_base;
}
}
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 76141262ea1d..80dc094e78c6 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1542,7 +1542,7 @@ out:
return err;
}
-static void __exit n2_unregister_algs(void)
+static void __devexit n2_unregister_algs(void)
{
mutex_lock(&spu_lock);
if (!--algs_registered)
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 799ca517c121..add2a1a72ba4 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -74,11 +74,9 @@
#define FLAGS_CBC BIT(1)
#define FLAGS_GIV BIT(2)
-#define FLAGS_NEW_KEY BIT(4)
-#define FLAGS_NEW_IV BIT(5)
-#define FLAGS_INIT BIT(6)
-#define FLAGS_FAST BIT(7)
-#define FLAGS_BUSY 8
+#define FLAGS_INIT BIT(4)
+#define FLAGS_FAST BIT(5)
+#define FLAGS_BUSY BIT(6)
struct omap_aes_ctx {
struct omap_aes_dev *dd;
@@ -98,19 +96,18 @@ struct omap_aes_reqctx {
struct omap_aes_dev {
struct list_head list;
unsigned long phys_base;
- void __iomem *io_base;
+ void __iomem *io_base;
struct clk *iclk;
struct omap_aes_ctx *ctx;
struct device *dev;
unsigned long flags;
+ int err;
- u32 *iv;
- u32 ctrl;
+ spinlock_t lock;
+ struct crypto_queue queue;
- spinlock_t lock;
- struct crypto_queue queue;
-
- struct tasklet_struct task;
+ struct tasklet_struct done_task;
+ struct tasklet_struct queue_task;
struct ablkcipher_request *req;
size_t total;
@@ -179,9 +176,13 @@ static int omap_aes_wait(struct omap_aes_dev *dd, u32 offset, u32 bit)
static int omap_aes_hw_init(struct omap_aes_dev *dd)
{
- int err = 0;
-
+ /*
+ * clocks are enabled when request starts and disabled when finished.
+ * It may be long delays between requests.
+ * Device might go to off mode to save power.
+ */
clk_enable(dd->iclk);
+
if (!(dd->flags & FLAGS_INIT)) {
/* is it necessary to reset before every operation? */
omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_SOFTRESET,
@@ -193,39 +194,26 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd)
__asm__ __volatile__("nop");
__asm__ __volatile__("nop");
- err = omap_aes_wait(dd, AES_REG_SYSSTATUS,
- AES_REG_SYSSTATUS_RESETDONE);
- if (!err)
- dd->flags |= FLAGS_INIT;
- }
+ if (omap_aes_wait(dd, AES_REG_SYSSTATUS,
+ AES_REG_SYSSTATUS_RESETDONE))
+ return -ETIMEDOUT;
- return err;
-}
+ dd->flags |= FLAGS_INIT;
+ dd->err = 0;
+ }
-static void omap_aes_hw_cleanup(struct omap_aes_dev *dd)
-{
- clk_disable(dd->iclk);
+ return 0;
}
-static void omap_aes_write_ctrl(struct omap_aes_dev *dd)
+static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
{
unsigned int key32;
- int i;
+ int i, err;
u32 val, mask;
- val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
- if (dd->flags & FLAGS_CBC)
- val |= AES_REG_CTRL_CBC;
- if (dd->flags & FLAGS_ENCRYPT)
- val |= AES_REG_CTRL_DIRECTION;
-
- if (dd->ctrl == val && !(dd->flags & FLAGS_NEW_IV) &&
- !(dd->ctx->flags & FLAGS_NEW_KEY))
- goto out;
-
- /* only need to write control registers for new settings */
-
- dd->ctrl = val;
+ err = omap_aes_hw_init(dd);
+ if (err)
+ return err;
val = 0;
if (dd->dma_lch_out >= 0)
@@ -237,30 +225,43 @@ static void omap_aes_write_ctrl(struct omap_aes_dev *dd)
omap_aes_write_mask(dd, AES_REG_MASK, val, mask);
- pr_debug("Set key\n");
key32 = dd->ctx->keylen / sizeof(u32);
- /* set a key */
+
+ /* it seems a key should always be set even if it has not changed */
for (i = 0; i < key32; i++) {
omap_aes_write(dd, AES_REG_KEY(i),
__le32_to_cpu(dd->ctx->key[i]));
}
- dd->ctx->flags &= ~FLAGS_NEW_KEY;
- if (dd->flags & FLAGS_NEW_IV) {
- pr_debug("Set IV\n");
- omap_aes_write_n(dd, AES_REG_IV(0), dd->iv, 4);
- dd->flags &= ~FLAGS_NEW_IV;
- }
+ if ((dd->flags & FLAGS_CBC) && dd->req->info)
+ omap_aes_write_n(dd, AES_REG_IV(0), dd->req->info, 4);
+
+ val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
+ if (dd->flags & FLAGS_CBC)
+ val |= AES_REG_CTRL_CBC;
+ if (dd->flags & FLAGS_ENCRYPT)
+ val |= AES_REG_CTRL_DIRECTION;
mask = AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION |
AES_REG_CTRL_KEY_SIZE;
- omap_aes_write_mask(dd, AES_REG_CTRL, dd->ctrl, mask);
+ omap_aes_write_mask(dd, AES_REG_CTRL, val, mask);
-out:
- /* start DMA or disable idle mode */
- omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START,
- AES_REG_MASK_START);
+ /* IN */
+ omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT,
+ dd->phys_base + AES_REG_DATA, 0, 4);
+
+ omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
+ omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
+
+ /* OUT */
+ omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT,
+ dd->phys_base + AES_REG_DATA, 0, 4);
+
+ omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
+ omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
+
+ return 0;
}
static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
@@ -288,8 +289,16 @@ static void omap_aes_dma_callback(int lch, u16 ch_status, void *data)
{
struct omap_aes_dev *dd = data;
- if (lch == dd->dma_lch_out)
- tasklet_schedule(&dd->task);
+ if (ch_status != OMAP_DMA_BLOCK_IRQ) {
+ pr_err("omap-aes DMA error status: 0x%hx\n", ch_status);
+ dd->err = -EIO;
+ dd->flags &= ~FLAGS_INIT; /* request to re-initialize */
+ } else if (lch == dd->dma_lch_in) {
+ return;
+ }
+
+ /* dma_lch_out - completed */
+ tasklet_schedule(&dd->done_task);
}
static int omap_aes_dma_init(struct omap_aes_dev *dd)
@@ -339,18 +348,6 @@ static int omap_aes_dma_init(struct omap_aes_dev *dd)
goto err_dma_out;
}
- omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT,
- dd->phys_base + AES_REG_DATA, 0, 4);
-
- omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
- omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
-
- omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT,
- dd->phys_base + AES_REG_DATA, 0, 4);
-
- omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
- omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
-
return 0;
err_dma_out:
@@ -406,6 +403,11 @@ static int sg_copy(struct scatterlist **sg, size_t *offset, void *buf,
if (!count)
return off;
+ /*
+ * buflen and total are AES_BLOCK_SIZE size aligned,
+ * so count should be also aligned
+ */
+
sg_copy_buf(buf + off, *sg, *offset, count, out);
off += count;
@@ -461,7 +463,9 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
omap_start_dma(dd->dma_lch_in);
omap_start_dma(dd->dma_lch_out);
- omap_aes_write_ctrl(dd);
+ /* start DMA or disable idle mode */
+ omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START,
+ AES_REG_MASK_START);
return 0;
}
@@ -488,8 +492,10 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
count = min(dd->total, sg_dma_len(dd->in_sg));
count = min(count, sg_dma_len(dd->out_sg));
- if (count != dd->total)
+ if (count != dd->total) {
+ pr_err("request length != buffer length\n");
return -EINVAL;
+ }
pr_debug("fast\n");
@@ -525,23 +531,25 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
dd->total -= count;
- err = omap_aes_hw_init(dd);
-
err = omap_aes_crypt_dma(tfm, addr_in, addr_out, count);
+ if (err) {
+ dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
+ dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE);
+ }
return err;
}
static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
{
- struct omap_aes_ctx *ctx;
+ struct ablkcipher_request *req = dd->req;
pr_debug("err: %d\n", err);
- ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(dd->req));
+ clk_disable(dd->iclk);
+ dd->flags &= ~FLAGS_BUSY;
- if (!dd->total)
- dd->req->base.complete(&dd->req->base, err);
+ req->base.complete(&req->base, err);
}
static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
@@ -553,8 +561,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
omap_aes_write_mask(dd, AES_REG_MASK, 0, AES_REG_MASK_START);
- omap_aes_hw_cleanup(dd);
-
omap_stop_dma(dd->dma_lch_in);
omap_stop_dma(dd->dma_lch_out);
@@ -574,40 +580,39 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
}
}
- if (err || !dd->total)
- omap_aes_finish_req(dd, err);
-
return err;
}
-static int omap_aes_handle_req(struct omap_aes_dev *dd)
+static int omap_aes_handle_queue(struct omap_aes_dev *dd,
+ struct ablkcipher_request *req)
{
struct crypto_async_request *async_req, *backlog;
struct omap_aes_ctx *ctx;
struct omap_aes_reqctx *rctx;
- struct ablkcipher_request *req;
unsigned long flags;
-
- if (dd->total)
- goto start;
+ int err, ret = 0;
spin_lock_irqsave(&dd->lock, flags);
+ if (req)
+ ret = ablkcipher_enqueue_request(&dd->queue, req);
+ if (dd->flags & FLAGS_BUSY) {
+ spin_unlock_irqrestore(&dd->lock, flags);
+ return ret;
+ }
backlog = crypto_get_backlog(&dd->queue);
async_req = crypto_dequeue_request(&dd->queue);
- if (!async_req)
- clear_bit(FLAGS_BUSY, &dd->flags);
+ if (async_req)
+ dd->flags |= FLAGS_BUSY;
spin_unlock_irqrestore(&dd->lock, flags);
if (!async_req)
- return 0;
+ return ret;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
req = ablkcipher_request_cast(async_req);
- pr_debug("get new req\n");
-
/* assign new request to device */
dd->req = req;
dd->total = req->nbytes;
@@ -621,27 +626,22 @@ static int omap_aes_handle_req(struct omap_aes_dev *dd)
rctx->mode &= FLAGS_MODE_MASK;
dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
- dd->iv = req->info;
- if ((dd->flags & FLAGS_CBC) && dd->iv)
- dd->flags |= FLAGS_NEW_IV;
- else
- dd->flags &= ~FLAGS_NEW_IV;
-
+ dd->ctx = ctx;
ctx->dd = dd;
- if (dd->ctx != ctx) {
- /* assign new context to device */
- dd->ctx = ctx;
- ctx->flags |= FLAGS_NEW_KEY;
- }
- if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE))
- pr_err("request size is not exact amount of AES blocks\n");
+ err = omap_aes_write_ctrl(dd);
+ if (!err)
+ err = omap_aes_crypt_dma_start(dd);
+ if (err) {
+ /* aes_task will not finish it, so do it here */
+ omap_aes_finish_req(dd, err);
+ tasklet_schedule(&dd->queue_task);
+ }
-start:
- return omap_aes_crypt_dma_start(dd);
+ return ret; /* return ret, which is enqueue return value */
}
-static void omap_aes_task(unsigned long data)
+static void omap_aes_done_task(unsigned long data)
{
struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
int err;
@@ -650,40 +650,50 @@ static void omap_aes_task(unsigned long data)
err = omap_aes_crypt_dma_stop(dd);
- err = omap_aes_handle_req(dd);
+ err = dd->err ? : err;
+
+ if (dd->total && !err) {
+ err = omap_aes_crypt_dma_start(dd);
+ if (!err)
+ return; /* DMA started. Not fininishing. */
+ }
+
+ omap_aes_finish_req(dd, err);
+ omap_aes_handle_queue(dd, NULL);
pr_debug("exit\n");
}
+static void omap_aes_queue_task(unsigned long data)
+{
+ struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
+
+ omap_aes_handle_queue(dd, NULL);
+}
+
static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
{
struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
crypto_ablkcipher_reqtfm(req));
struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
struct omap_aes_dev *dd;
- unsigned long flags;
- int err;
pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes,
!!(mode & FLAGS_ENCRYPT),
!!(mode & FLAGS_CBC));
+ if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) {
+ pr_err("request size is not exact amount of AES blocks\n");
+ return -EINVAL;
+ }
+
dd = omap_aes_find_dev(ctx);
if (!dd)
return -ENODEV;
rctx->mode = mode;
- spin_lock_irqsave(&dd->lock, flags);
- err = ablkcipher_enqueue_request(&dd->queue, req);
- spin_unlock_irqrestore(&dd->lock, flags);
-
- if (!test_and_set_bit(FLAGS_BUSY, &dd->flags))
- omap_aes_handle_req(dd);
-
- pr_debug("exit\n");
-
- return err;
+ return omap_aes_handle_queue(dd, req);
}
/* ********************** ALG API ************************************ */
@@ -701,7 +711,6 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
memcpy(ctx->key, key, keylen);
ctx->keylen = keylen;
- ctx->flags |= FLAGS_NEW_KEY;
return 0;
}
@@ -750,7 +759,7 @@ static struct crypto_alg algs[] = {
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_aes_ctx),
- .cra_alignmask = 0,
+ .cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = omap_aes_cra_init,
@@ -770,7 +779,7 @@ static struct crypto_alg algs[] = {
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_aes_ctx),
- .cra_alignmask = 0,
+ .cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = omap_aes_cra_init,
@@ -849,7 +858,8 @@ static int omap_aes_probe(struct platform_device *pdev)
(reg & AES_REG_REV_MAJOR) >> 4, reg & AES_REG_REV_MINOR);
clk_disable(dd->iclk);
- tasklet_init(&dd->task, omap_aes_task, (unsigned long)dd);
+ tasklet_init(&dd->done_task, omap_aes_done_task, (unsigned long)dd);
+ tasklet_init(&dd->queue_task, omap_aes_queue_task, (unsigned long)dd);
err = omap_aes_dma_init(dd);
if (err)
@@ -876,7 +886,8 @@ err_algs:
crypto_unregister_alg(&algs[j]);
omap_aes_dma_cleanup(dd);
err_dma:
- tasklet_kill(&dd->task);
+ tasklet_kill(&dd->done_task);
+ tasklet_kill(&dd->queue_task);
iounmap(dd->io_base);
err_io:
clk_put(dd->iclk);
@@ -903,7 +914,8 @@ static int omap_aes_remove(struct platform_device *pdev)
for (i = 0; i < ARRAY_SIZE(algs); i++)
crypto_unregister_alg(&algs[i]);
- tasklet_kill(&dd->task);
+ tasklet_kill(&dd->done_task);
+ tasklet_kill(&dd->queue_task);
omap_aes_dma_cleanup(dd);
iounmap(dd->io_base);
clk_put(dd->iclk);
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index a081c7c7d03f..2e71123516e0 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -72,10 +72,9 @@
#define DEFAULT_TIMEOUT_INTERVAL HZ
-#define FLAGS_FIRST 0x0001
#define FLAGS_FINUP 0x0002
#define FLAGS_FINAL 0x0004
-#define FLAGS_FAST 0x0008
+#define FLAGS_SG 0x0008
#define FLAGS_SHA1 0x0010
#define FLAGS_DMA_ACTIVE 0x0020
#define FLAGS_OUTPUT_READY 0x0040
@@ -83,13 +82,17 @@
#define FLAGS_INIT 0x0100
#define FLAGS_CPU 0x0200
#define FLAGS_HMAC 0x0400
-
-/* 3rd byte */
-#define FLAGS_BUSY 16
+#define FLAGS_ERROR 0x0800
+#define FLAGS_BUSY 0x1000
#define OP_UPDATE 1
#define OP_FINAL 2
+#define OMAP_ALIGN_MASK (sizeof(u32)-1)
+#define OMAP_ALIGNED __attribute__((aligned(sizeof(u32))))
+
+#define BUFLEN PAGE_SIZE
+
struct omap_sham_dev;
struct omap_sham_reqctx {
@@ -97,8 +100,8 @@ struct omap_sham_reqctx {
unsigned long flags;
unsigned long op;
+ u8 digest[SHA1_DIGEST_SIZE] OMAP_ALIGNED;
size_t digcnt;
- u8 *buffer;
size_t bufcnt;
size_t buflen;
dma_addr_t dma_addr;
@@ -107,6 +110,8 @@ struct omap_sham_reqctx {
struct scatterlist *sg;
unsigned int offset; /* offset in current sg */
unsigned int total; /* total request */
+
+ u8 buffer[0] OMAP_ALIGNED;
};
struct omap_sham_hmac_ctx {
@@ -136,6 +141,7 @@ struct omap_sham_dev {
int irq;
struct clk *iclk;
spinlock_t lock;
+ int err;
int dma;
int dma_lch;
struct tasklet_struct done_task;
@@ -194,53 +200,68 @@ static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit)
static void omap_sham_copy_hash(struct ahash_request *req, int out)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+ u32 *hash = (u32 *)ctx->digest;
+ int i;
+
+ /* MD5 is almost unused. So copy sha1 size to reduce code */
+ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) {
+ if (out)
+ hash[i] = omap_sham_read(ctx->dd,
+ SHA_REG_DIGEST(i));
+ else
+ omap_sham_write(ctx->dd,
+ SHA_REG_DIGEST(i), hash[i]);
+ }
+}
+
+static void omap_sham_copy_ready_hash(struct ahash_request *req)
+{
+ struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+ u32 *in = (u32 *)ctx->digest;
u32 *hash = (u32 *)req->result;
int i;
+ if (!hash)
+ return;
+
if (likely(ctx->flags & FLAGS_SHA1)) {
/* SHA1 results are in big endian */
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
- if (out)
- hash[i] = be32_to_cpu(omap_sham_read(ctx->dd,
- SHA_REG_DIGEST(i)));
- else
- omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
- cpu_to_be32(hash[i]));
+ hash[i] = be32_to_cpu(in[i]);
} else {
/* MD5 results are in little endian */
for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++)
- if (out)
- hash[i] = le32_to_cpu(omap_sham_read(ctx->dd,
- SHA_REG_DIGEST(i)));
- else
- omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
- cpu_to_le32(hash[i]));
+ hash[i] = le32_to_cpu(in[i]);
}
}
-static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
- int final, int dma)
+static int omap_sham_hw_init(struct omap_sham_dev *dd)
{
- struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
- u32 val = length << 5, mask;
+ clk_enable(dd->iclk);
- if (unlikely(!ctx->digcnt)) {
+ if (!(dd->flags & FLAGS_INIT)) {
+ omap_sham_write_mask(dd, SHA_REG_MASK,
+ SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET);
- clk_enable(dd->iclk);
+ if (omap_sham_wait(dd, SHA_REG_SYSSTATUS,
+ SHA_REG_SYSSTATUS_RESETDONE))
+ return -ETIMEDOUT;
- if (!(dd->flags & FLAGS_INIT)) {
- omap_sham_write_mask(dd, SHA_REG_MASK,
- SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET);
+ dd->flags |= FLAGS_INIT;
+ dd->err = 0;
+ }
- if (omap_sham_wait(dd, SHA_REG_SYSSTATUS,
- SHA_REG_SYSSTATUS_RESETDONE))
- return -ETIMEDOUT;
+ return 0;
+}
- dd->flags |= FLAGS_INIT;
- }
- } else {
+static void omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
+ int final, int dma)
+{
+ struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+ u32 val = length << 5, mask;
+
+ if (likely(ctx->digcnt))
omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt);
- }
omap_sham_write_mask(dd, SHA_REG_MASK,
SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0),
@@ -260,29 +281,26 @@ static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
SHA_REG_CTRL_ALGO | SHA_REG_CTRL_LENGTH;
omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask);
-
- return 0;
}
static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
size_t length, int final)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
- int err, count, len32;
+ int count, len32;
const u32 *buffer = (const u32 *)buf;
dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
ctx->digcnt, length, final);
- err = omap_sham_write_ctrl(dd, length, final, 0);
- if (err)
- return err;
+ omap_sham_write_ctrl(dd, length, final, 0);
+
+ /* should be non-zero before next lines to disable clocks later */
+ ctx->digcnt += length;
if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY))
return -ETIMEDOUT;
- ctx->digcnt += length;
-
if (final)
ctx->flags |= FLAGS_FINAL; /* catch last interrupt */
@@ -298,16 +316,11 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
size_t length, int final)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
- int err, len32;
+ int len32;
dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
ctx->digcnt, length, final);
- /* flush cache entries related to our page */
- if (dma_addr == ctx->dma_addr)
- dma_sync_single_for_device(dd->dev, dma_addr, length,
- DMA_TO_DEVICE);
-
len32 = DIV_ROUND_UP(length, sizeof(u32));
omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32,
@@ -317,9 +330,7 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC,
dma_addr, 0, 0);
- err = omap_sham_write_ctrl(dd, length, final, 1);
- if (err)
- return err;
+ omap_sham_write_ctrl(dd, length, final, 1);
ctx->digcnt += length;
@@ -371,15 +382,29 @@ static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
return 0;
}
+static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd,
+ struct omap_sham_reqctx *ctx,
+ size_t length, int final)
+{
+ ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+ dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen);
+ return -EINVAL;
+ }
+
+ ctx->flags &= ~FLAGS_SG;
+
+ /* next call does not fail... so no unmap in the case of error */
+ return omap_sham_xmit_dma(dd, ctx->dma_addr, length, final);
+}
+
static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
unsigned int final;
size_t count;
- if (!ctx->total)
- return 0;
-
omap_sham_append_sg(ctx);
final = (ctx->flags & FLAGS_FINUP) && !ctx->total;
@@ -390,30 +415,68 @@ static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
count = ctx->bufcnt;
ctx->bufcnt = 0;
- return omap_sham_xmit_dma(dd, ctx->dma_addr, count, final);
+ return omap_sham_xmit_dma_map(dd, ctx, count, final);
}
return 0;
}
-static int omap_sham_update_dma_fast(struct omap_sham_dev *dd)
+/* Start address alignment */
+#define SG_AA(sg) (IS_ALIGNED(sg->offset, sizeof(u32)))
+/* SHA1 block size alignment */
+#define SG_SA(sg) (IS_ALIGNED(sg->length, SHA1_MD5_BLOCK_SIZE))
+
+static int omap_sham_update_dma_start(struct omap_sham_dev *dd)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
- unsigned int length;
+ unsigned int length, final, tail;
+ struct scatterlist *sg;
- ctx->flags |= FLAGS_FAST;
+ if (!ctx->total)
+ return 0;
+
+ if (ctx->bufcnt || ctx->offset)
+ return omap_sham_update_dma_slow(dd);
+
+ dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
+ ctx->digcnt, ctx->bufcnt, ctx->total);
+
+ sg = ctx->sg;
- length = min(ctx->total, sg_dma_len(ctx->sg));
- ctx->total = length;
+ if (!SG_AA(sg))
+ return omap_sham_update_dma_slow(dd);
+
+ if (!sg_is_last(sg) && !SG_SA(sg))
+ /* size is not SHA1_BLOCK_SIZE aligned */
+ return omap_sham_update_dma_slow(dd);
+
+ length = min(ctx->total, sg->length);
+
+ if (sg_is_last(sg)) {
+ if (!(ctx->flags & FLAGS_FINUP)) {
+ /* not last sg must be SHA1_MD5_BLOCK_SIZE aligned */
+ tail = length & (SHA1_MD5_BLOCK_SIZE - 1);
+ /* without finup() we need one block to close hash */
+ if (!tail)
+ tail = SHA1_MD5_BLOCK_SIZE;
+ length -= tail;
+ }
+ }
if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
dev_err(dd->dev, "dma_map_sg error\n");
return -EINVAL;
}
+ ctx->flags |= FLAGS_SG;
+
ctx->total -= length;
+ ctx->offset = length; /* offset where to start slow */
- return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1);
+ final = (ctx->flags & FLAGS_FINUP) && !ctx->total;
+
+ /* next call does not fail... so no unmap in the case of error */
+ return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final);
}
static int omap_sham_update_cpu(struct omap_sham_dev *dd)
@@ -433,8 +496,17 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
omap_stop_dma(dd->dma_lch);
- if (ctx->flags & FLAGS_FAST)
+ if (ctx->flags & FLAGS_SG) {
dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+ if (ctx->sg->length == ctx->offset) {
+ ctx->sg = sg_next(ctx->sg);
+ if (ctx->sg)
+ ctx->offset = 0;
+ }
+ } else {
+ dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
+ DMA_TO_DEVICE);
+ }
return 0;
}
@@ -454,14 +526,7 @@ static void omap_sham_cleanup(struct ahash_request *req)
spin_unlock_irqrestore(&dd->lock, flags);
if (ctx->digcnt)
- clk_disable(dd->iclk);
-
- if (ctx->dma_addr)
- dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
- DMA_TO_DEVICE);
-
- if (ctx->buffer)
- free_page((unsigned long)ctx->buffer);
+ omap_sham_copy_ready_hash(req);
dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt);
}
@@ -489,8 +554,6 @@ static int omap_sham_init(struct ahash_request *req)
ctx->flags = 0;
- ctx->flags |= FLAGS_FIRST;
-
dev_dbg(dd->dev, "init: digest size: %d\n",
crypto_ahash_digestsize(tfm));
@@ -499,21 +562,7 @@ static int omap_sham_init(struct ahash_request *req)
ctx->bufcnt = 0;
ctx->digcnt = 0;
-
- ctx->buflen = PAGE_SIZE;
- ctx->buffer = (void *)__get_free_page(
- (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
- GFP_KERNEL : GFP_ATOMIC);
- if (!ctx->buffer)
- return -ENOMEM;
-
- ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
- DMA_TO_DEVICE);
- if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
- dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen);
- free_page((unsigned long)ctx->buffer);
- return -EINVAL;
- }
+ ctx->buflen = BUFLEN;
if (tctx->flags & FLAGS_HMAC) {
struct omap_sham_hmac_ctx *bctx = tctx->base;
@@ -538,10 +587,8 @@ static int omap_sham_update_req(struct omap_sham_dev *dd)
if (ctx->flags & FLAGS_CPU)
err = omap_sham_update_cpu(dd);
- else if (ctx->flags & FLAGS_FAST)
- err = omap_sham_update_dma_fast(dd);
else
- err = omap_sham_update_dma_slow(dd);
+ err = omap_sham_update_dma_start(dd);
/* wait for dma completion before can take more data */
dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt);
@@ -560,15 +607,12 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
use_dma = 0;
if (use_dma)
- err = omap_sham_xmit_dma(dd, ctx->dma_addr, ctx->bufcnt, 1);
+ err = omap_sham_xmit_dma_map(dd, ctx, ctx->bufcnt, 1);
else
err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1);
ctx->bufcnt = 0;
- if (err != -EINPROGRESS)
- omap_sham_cleanup(req);
-
dev_dbg(dd->dev, "final_req: err: %d\n", err);
return err;
@@ -576,6 +620,7 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
static int omap_sham_finish_req_hmac(struct ahash_request *req)
{
+ struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
struct omap_sham_hmac_ctx *bctx = tctx->base;
int bs = crypto_shash_blocksize(bctx->shash);
@@ -590,48 +635,56 @@ static int omap_sham_finish_req_hmac(struct ahash_request *req)
return crypto_shash_init(&desc.shash) ?:
crypto_shash_update(&desc.shash, bctx->opad, bs) ?:
- crypto_shash_finup(&desc.shash, req->result, ds, req->result);
+ crypto_shash_finup(&desc.shash, ctx->digest, ds, ctx->digest);
}
static void omap_sham_finish_req(struct ahash_request *req, int err)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+ struct omap_sham_dev *dd = ctx->dd;
if (!err) {
omap_sham_copy_hash(ctx->dd->req, 1);
if (ctx->flags & FLAGS_HMAC)
err = omap_sham_finish_req_hmac(req);
+ } else {
+ ctx->flags |= FLAGS_ERROR;
}
- if (ctx->flags & FLAGS_FINAL)
+ if ((ctx->flags & FLAGS_FINAL) || err)
omap_sham_cleanup(req);
- clear_bit(FLAGS_BUSY, &ctx->dd->flags);
+ clk_disable(dd->iclk);
+ dd->flags &= ~FLAGS_BUSY;
if (req->base.complete)
req->base.complete(&req->base, err);
}
-static int omap_sham_handle_queue(struct omap_sham_dev *dd)
+static int omap_sham_handle_queue(struct omap_sham_dev *dd,
+ struct ahash_request *req)
{
struct crypto_async_request *async_req, *backlog;
struct omap_sham_reqctx *ctx;
- struct ahash_request *req, *prev_req;
+ struct ahash_request *prev_req;
unsigned long flags;
- int err = 0;
-
- if (test_and_set_bit(FLAGS_BUSY, &dd->flags))
- return 0;
+ int err = 0, ret = 0;
spin_lock_irqsave(&dd->lock, flags);
+ if (req)
+ ret = ahash_enqueue_request(&dd->queue, req);
+ if (dd->flags & FLAGS_BUSY) {
+ spin_unlock_irqrestore(&dd->lock, flags);
+ return ret;
+ }
backlog = crypto_get_backlog(&dd->queue);
async_req = crypto_dequeue_request(&dd->queue);
- if (!async_req)
- clear_bit(FLAGS_BUSY, &dd->flags);
+ if (async_req)
+ dd->flags |= FLAGS_BUSY;
spin_unlock_irqrestore(&dd->lock, flags);
if (!async_req)
- return 0;
+ return ret;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
@@ -646,7 +699,22 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd)
dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
ctx->op, req->nbytes);
- if (req != prev_req && ctx->digcnt)
+
+ err = omap_sham_hw_init(dd);
+ if (err)
+ goto err1;
+
+ omap_set_dma_dest_params(dd->dma_lch, 0,
+ OMAP_DMA_AMODE_CONSTANT,
+ dd->phys_base + SHA_REG_DIN(0), 0, 16);
+
+ omap_set_dma_dest_burst_mode(dd->dma_lch,
+ OMAP_DMA_DATA_BURST_16);
+
+ omap_set_dma_src_burst_mode(dd->dma_lch,
+ OMAP_DMA_DATA_BURST_4);
+
+ if (ctx->digcnt)
/* request has changed - restore hash */
omap_sham_copy_hash(req, 0);
@@ -658,7 +726,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd)
} else if (ctx->op == OP_FINAL) {
err = omap_sham_final_req(dd);
}
-
+err1:
if (err != -EINPROGRESS) {
/* done_task will not finish it, so do it here */
omap_sham_finish_req(req, err);
@@ -667,7 +735,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd)
dev_dbg(dd->dev, "exit, err: %d\n", err);
- return err;
+ return ret;
}
static int omap_sham_enqueue(struct ahash_request *req, unsigned int op)
@@ -675,18 +743,10 @@ static int omap_sham_enqueue(struct ahash_request *req, unsigned int op)
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
struct omap_sham_dev *dd = tctx->dd;
- unsigned long flags;
- int err;
ctx->op = op;
- spin_lock_irqsave(&dd->lock, flags);
- err = ahash_enqueue_request(&dd->queue, req);
- spin_unlock_irqrestore(&dd->lock, flags);
-
- omap_sham_handle_queue(dd);
-
- return err;
+ return omap_sham_handle_queue(dd, req);
}
static int omap_sham_update(struct ahash_request *req)
@@ -709,21 +769,13 @@ static int omap_sham_update(struct ahash_request *req)
*/
omap_sham_append_sg(ctx);
return 0;
- } else if (ctx->bufcnt + ctx->total <= 64) {
+ } else if (ctx->bufcnt + ctx->total <= SHA1_MD5_BLOCK_SIZE) {
+ /*
+ * faster to use CPU for short transfers
+ */
ctx->flags |= FLAGS_CPU;
- } else if (!ctx->bufcnt && sg_is_last(ctx->sg)) {
- /* may be can use faster functions */
- int aligned = IS_ALIGNED((u32)ctx->sg->offset,
- sizeof(u32));
-
- if (aligned && (ctx->flags & FLAGS_FIRST))
- /* digest: first and final */
- ctx->flags |= FLAGS_FAST;
-
- ctx->flags &= ~FLAGS_FIRST;
}
- } else if (ctx->bufcnt + ctx->total <= ctx->buflen) {
- /* if not finaup -> not fast */
+ } else if (ctx->bufcnt + ctx->total < ctx->buflen) {
omap_sham_append_sg(ctx);
return 0;
}
@@ -761,12 +813,14 @@ static int omap_sham_final(struct ahash_request *req)
ctx->flags |= FLAGS_FINUP;
- /* OMAP HW accel works only with buffers >= 9 */
- /* HMAC is always >= 9 because of ipad */
- if ((ctx->digcnt + ctx->bufcnt) < 9)
- err = omap_sham_final_shash(req);
- else if (ctx->bufcnt)
- return omap_sham_enqueue(req, OP_FINAL);
+ if (!(ctx->flags & FLAGS_ERROR)) {
+ /* OMAP HW accel works only with buffers >= 9 */
+ /* HMAC is always >= 9 because of ipad */
+ if ((ctx->digcnt + ctx->bufcnt) < 9)
+ err = omap_sham_final_shash(req);
+ else if (ctx->bufcnt)
+ return omap_sham_enqueue(req, OP_FINAL);
+ }
omap_sham_cleanup(req);
@@ -836,6 +890,8 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
const char *alg_name = crypto_tfm_alg_name(tfm);
+ pr_info("enter\n");
+
/* Allocate a fallback and abort if it failed. */
tctx->fallback = crypto_alloc_shash(alg_name, 0,
CRYPTO_ALG_NEED_FALLBACK);
@@ -846,7 +902,7 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
}
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct omap_sham_reqctx));
+ sizeof(struct omap_sham_reqctx) + BUFLEN);
if (alg_base) {
struct omap_sham_hmac_ctx *bctx = tctx->base;
@@ -932,7 +988,7 @@ static struct ahash_alg algs[] = {
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_sham_ctx),
- .cra_alignmask = 0,
+ .cra_alignmask = OMAP_ALIGN_MASK,
.cra_module = THIS_MODULE,
.cra_init = omap_sham_cra_init,
.cra_exit = omap_sham_cra_exit,
@@ -956,7 +1012,7 @@ static struct ahash_alg algs[] = {
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_sham_ctx) +
sizeof(struct omap_sham_hmac_ctx),
- .cra_alignmask = 0,
+ .cra_alignmask = OMAP_ALIGN_MASK,
.cra_module = THIS_MODULE,
.cra_init = omap_sham_cra_sha1_init,
.cra_exit = omap_sham_cra_exit,
@@ -980,7 +1036,7 @@ static struct ahash_alg algs[] = {
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_sham_ctx) +
sizeof(struct omap_sham_hmac_ctx),
- .cra_alignmask = 0,
+ .cra_alignmask = OMAP_ALIGN_MASK,
.cra_module = THIS_MODULE,
.cra_init = omap_sham_cra_md5_init,
.cra_exit = omap_sham_cra_exit,
@@ -993,7 +1049,7 @@ static void omap_sham_done_task(unsigned long data)
struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
struct ahash_request *req = dd->req;
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
- int ready = 1;
+ int ready = 0, err = 0;
if (ctx->flags & FLAGS_OUTPUT_READY) {
ctx->flags &= ~FLAGS_OUTPUT_READY;
@@ -1003,15 +1059,18 @@ static void omap_sham_done_task(unsigned long data)
if (dd->flags & FLAGS_DMA_ACTIVE) {
dd->flags &= ~FLAGS_DMA_ACTIVE;
omap_sham_update_dma_stop(dd);
- omap_sham_update_dma_slow(dd);
+ if (!dd->err)
+ err = omap_sham_update_dma_start(dd);
}
- if (ready && !(dd->flags & FLAGS_DMA_ACTIVE)) {
- dev_dbg(dd->dev, "update done\n");
+ err = dd->err ? : err;
+
+ if (err != -EINPROGRESS && (ready || err)) {
+ dev_dbg(dd->dev, "update done: err: %d\n", err);
/* finish curent request */
- omap_sham_finish_req(req, 0);
+ omap_sham_finish_req(req, err);
/* start new request */
- omap_sham_handle_queue(dd);
+ omap_sham_handle_queue(dd, NULL);
}
}
@@ -1019,7 +1078,7 @@ static void omap_sham_queue_task(unsigned long data)
{
struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
- omap_sham_handle_queue(dd);
+ omap_sham_handle_queue(dd, NULL);
}
static irqreturn_t omap_sham_irq(int irq, void *dev_id)
@@ -1041,6 +1100,7 @@ static irqreturn_t omap_sham_irq(int irq, void *dev_id)
omap_sham_read(dd, SHA_REG_CTRL);
ctx->flags |= FLAGS_OUTPUT_READY;
+ dd->err = 0;
tasklet_schedule(&dd->done_task);
return IRQ_HANDLED;
@@ -1050,8 +1110,13 @@ static void omap_sham_dma_callback(int lch, u16 ch_status, void *data)
{
struct omap_sham_dev *dd = data;
- if (likely(lch == dd->dma_lch))
- tasklet_schedule(&dd->done_task);
+ if (ch_status != OMAP_DMA_BLOCK_IRQ) {
+ pr_err("omap-sham DMA error status: 0x%hx\n", ch_status);
+ dd->err = -EIO;
+ dd->flags &= ~FLAGS_INIT; /* request to re-initialize */
+ }
+
+ tasklet_schedule(&dd->done_task);
}
static int omap_sham_dma_init(struct omap_sham_dev *dd)
@@ -1066,15 +1131,6 @@ static int omap_sham_dma_init(struct omap_sham_dev *dd)
dev_err(dd->dev, "Unable to request DMA channel\n");
return err;
}
- omap_set_dma_dest_params(dd->dma_lch, 0,
- OMAP_DMA_AMODE_CONSTANT,
- dd->phys_base + SHA_REG_DIN(0), 0, 16);
-
- omap_set_dma_dest_burst_mode(dd->dma_lch,
- OMAP_DMA_DATA_BURST_16);
-
- omap_set_dma_src_burst_mode(dd->dma_lch,
- OMAP_DMA_DATA_BURST_4);
return 0;
}
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 8a515baa38f7..db33d300aa23 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -9,6 +9,7 @@
#include <crypto/algapi.h>
#include <crypto/aes.h>
+#include <crypto/padlock.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -21,7 +22,6 @@
#include <asm/byteorder.h>
#include <asm/processor.h>
#include <asm/i387.h>
-#include "padlock.h"
/*
* Number of data blocks actually fetched for each xcrypt insn.
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index d3a27e0119bc..adf075b6b9a8 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -13,6 +13,7 @@
*/
#include <crypto/internal/hash.h>
+#include <crypto/padlock.h>
#include <crypto/sha.h>
#include <linux/err.h>
#include <linux/module.h>
@@ -22,13 +23,6 @@
#include <linux/kernel.h>
#include <linux/scatterlist.h>
#include <asm/i387.h>
-#include "padlock.h"
-
-#ifdef CONFIG_64BIT
-#define STACK_ALIGN 16
-#else
-#define STACK_ALIGN 4
-#endif
struct padlock_sha_desc {
struct shash_desc fallback;
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
new file mode 100644
index 000000000000..c5813c87de06
--- /dev/null
+++ b/include/crypto/if_alg.h
@@ -0,0 +1,92 @@
+/*
+ * if_alg: User-space algorithm interface
+ *
+ * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_IF_ALG_H
+#define _CRYPTO_IF_ALG_H
+
+#include <linux/compiler.h>
+#include <linux/completion.h>
+#include <linux/if_alg.h>
+#include <linux/types.h>
+#include <net/sock.h>
+
+#define ALG_MAX_PAGES 16
+
+struct crypto_async_request;
+
+struct alg_sock {
+ /* struct sock must be the first member of struct alg_sock */
+ struct sock sk;
+
+ struct sock *parent;
+
+ const struct af_alg_type *type;
+ void *private;
+};
+
+struct af_alg_completion {
+ struct completion completion;
+ int err;
+};
+
+struct af_alg_control {
+ struct af_alg_iv *iv;
+ int op;
+};
+
+struct af_alg_type {
+ void *(*bind)(const char *name, u32 type, u32 mask);
+ void (*release)(void *private);
+ int (*setkey)(void *private, const u8 *key, unsigned int keylen);
+ int (*accept)(void *private, struct sock *sk);
+
+ struct proto_ops *ops;
+ struct module *owner;
+ char name[14];
+};
+
+struct af_alg_sgl {
+ struct scatterlist sg[ALG_MAX_PAGES];
+ struct page *pages[ALG_MAX_PAGES];
+};
+
+int af_alg_register_type(const struct af_alg_type *type);
+int af_alg_unregister_type(const struct af_alg_type *type);
+
+int af_alg_release(struct socket *sock);
+int af_alg_accept(struct sock *sk, struct socket *newsock);
+
+int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len,
+ int write);
+void af_alg_free_sg(struct af_alg_sgl *sgl);
+
+int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con);
+
+int af_alg_wait_for_completion(int err, struct af_alg_completion *completion);
+void af_alg_complete(struct crypto_async_request *req, int err);
+
+static inline struct alg_sock *alg_sk(struct sock *sk)
+{
+ return (struct alg_sock *)sk;
+}
+
+static inline void af_alg_release_parent(struct sock *sk)
+{
+ sock_put(alg_sk(sk)->parent);
+}
+
+static inline void af_alg_init_completion(struct af_alg_completion *completion)
+{
+ init_completion(&completion->completion);
+}
+
+#endif /* _CRYPTO_IF_ALG_H */
diff --git a/drivers/crypto/padlock.h b/include/crypto/padlock.h
index b728e4518bd1..d2cfa2ef49e8 100644
--- a/drivers/crypto/padlock.h
+++ b/include/crypto/padlock.h
@@ -15,9 +15,15 @@
#define PADLOCK_ALIGNMENT 16
-#define PFX "padlock: "
+#define PFX KBUILD_MODNAME ": "
#define PADLOCK_CRA_PRIORITY 300
#define PADLOCK_COMPOSITE_PRIORITY 400
+#ifdef CONFIG_64BIT
+#define STACK_ALIGN 16
+#else
+#define STACK_ALIGN 4
+#endif
+
#endif /* _CRYPTO_PADLOCK_H */
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 833d208c25d6..4fd95a323beb 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -68,6 +68,21 @@ static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg)
return (++sg)->length ? sg : (void *)sg_page(sg);
}
+static inline void scatterwalk_crypto_chain(struct scatterlist *head,
+ struct scatterlist *sg,
+ int chain, int num)
+{
+ if (chain) {
+ head->length += sg->length;
+ sg = scatterwalk_sg_next(sg);
+ }
+
+ if (sg)
+ scatterwalk_sg_chain(head, num, sg);
+ else
+ sg_mark_end(head);
+}
+
static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in,
struct scatter_walk *walk_out)
{
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index d1580c17cab3..2296d8b1931f 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -158,6 +158,7 @@ header-y += icmpv6.h
header-y += if.h
header-y += if_addr.h
header-y += if_addrlabel.h
+header-y += if_alg.h
header-y += if_arcnet.h
header-y += if_arp.h
header-y += if_bonding.h
diff --git a/include/linux/if_alg.h b/include/linux/if_alg.h
new file mode 100644
index 000000000000..0f9acce5b1ff
--- /dev/null
+++ b/include/linux/if_alg.h
@@ -0,0 +1,40 @@
+/*
+ * if_alg: User-space algorithm interface
+ *
+ * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _LINUX_IF_ALG_H
+#define _LINUX_IF_ALG_H
+
+#include <linux/types.h>
+
+struct sockaddr_alg {
+ __u16 salg_family;
+ __u8 salg_type[14];
+ __u32 salg_feat;
+ __u32 salg_mask;
+ __u8 salg_name[64];
+};
+
+struct af_alg_iv {
+ __u32 ivlen;
+ __u8 iv[0];
+};
+
+/* Socket options */
+#define ALG_SET_KEY 1
+#define ALG_SET_IV 2
+#define ALG_SET_OP 3
+
+/* Operations */
+#define ALG_OP_DECRYPT 0
+#define ALG_OP_ENCRYPT 1
+
+#endif /* _LINUX_IF_ALG_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 5f65f14c4f44..edbb1d07ddf4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -191,7 +191,8 @@ struct ucred {
#define AF_PHONET 35 /* Phonet sockets */
#define AF_IEEE802154 36 /* IEEE802154 sockets */
#define AF_CAIF 37 /* CAIF sockets */
-#define AF_MAX 38 /* For now.. */
+#define AF_ALG 38 /* Algorithm sockets */
+#define AF_MAX 39 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
@@ -232,6 +233,7 @@ struct ucred {
#define PF_PHONET AF_PHONET
#define PF_IEEE802154 AF_IEEE802154
#define PF_CAIF AF_CAIF
+#define PF_ALG AF_ALG
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
@@ -305,6 +307,7 @@ struct ucred {
#define SOL_RDS 276
#define SOL_IUCV 277
#define SOL_CAIF 278
+#define SOL_ALG 279
/* IPX options */
#define IPX_TYPE 1
diff --git a/net/core/sock.c b/net/core/sock.c
index a658aeb6d554..7dfed792434d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -157,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
- "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
+ "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
"sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
@@ -173,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-27" , "slock-28" , "slock-AF_CAN" ,
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
- "slock-AF_IEEE802154", "slock-AF_CAIF" ,
+ "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
"slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
@@ -189,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-27" , "clock-28" , "clock-AF_CAN" ,
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
- "clock-AF_IEEE802154", "clock-AF_CAIF" ,
+ "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
"clock-AF_MAX"
};