summaryrefslogtreecommitdiffstats
path: root/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm
diff options
context:
space:
mode:
Diffstat (limited to 'CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm')
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm4708
1 files changed, 4708 insertions, 0 deletions
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm
new file mode 100644
index 0000000000..b2a9c65f5d
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/aes/aesni-sha256-x86_64.nasm
@@ -0,0 +1,4708 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+
+EXTERN OPENSSL_ia32cap_P
+global aesni_cbc_sha256_enc
+
+ALIGN 16
+aesni_cbc_sha256_enc:
+
+ lea r11,[OPENSSL_ia32cap_P]
+ mov eax,1
+ cmp rcx,0
+ je NEAR $L$probe
+ mov eax,DWORD[r11]
+ mov r10,QWORD[4+r11]
+ bt r10,61
+ jc NEAR aesni_cbc_sha256_enc_shaext
+ mov r11,r10
+ shr r11,32
+
+ test r10d,2048
+ jnz NEAR aesni_cbc_sha256_enc_xop
+ and r11d,296
+ cmp r11d,296
+ je NEAR aesni_cbc_sha256_enc_avx2
+ and r10d,268435456
+ jnz NEAR aesni_cbc_sha256_enc_avx
+ ud2
+ xor eax,eax
+ cmp rcx,0
+ je NEAR $L$probe
+ ud2
+$L$probe:
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 64
+
+K256:
+ DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+ DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+ DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+ DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+ DD 0,0,0,0,0,0,0,0,-1,-1,-1,-1
+ DD 0,0,0,0,0,0,0,0
+DB 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54
+DB 32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95
+DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98
+DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108
+DB 46,111,114,103,62,0
+ALIGN 64
+
+ALIGN 64
+aesni_cbc_sha256_enc_xop:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_sha256_enc_xop:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+
+$L$xop_shortcut:
+ mov r10,QWORD[56+rsp]
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ sub rsp,288
+ and rsp,-64
+
+ shl rdx,6
+ sub rsi,rdi
+ sub r10,rdi
+ add rdx,rdi
+
+
+ mov QWORD[((64+8))+rsp],rsi
+ mov QWORD[((64+16))+rsp],rdx
+
+ mov QWORD[((64+32))+rsp],r8
+ mov QWORD[((64+40))+rsp],r9
+ mov QWORD[((64+48))+rsp],r10
+ mov QWORD[120+rsp],rax
+
+ movaps XMMWORD[128+rsp],xmm6
+ movaps XMMWORD[144+rsp],xmm7
+ movaps XMMWORD[160+rsp],xmm8
+ movaps XMMWORD[176+rsp],xmm9
+ movaps XMMWORD[192+rsp],xmm10
+ movaps XMMWORD[208+rsp],xmm11
+ movaps XMMWORD[224+rsp],xmm12
+ movaps XMMWORD[240+rsp],xmm13
+ movaps XMMWORD[256+rsp],xmm14
+ movaps XMMWORD[272+rsp],xmm15
+$L$prologue_xop:
+ vzeroall
+
+ mov r12,rdi
+ lea rdi,[128+rcx]
+ lea r13,[((K256+544))]
+ mov r14d,DWORD[((240-128))+rdi]
+ mov r15,r9
+ mov rsi,r10
+ vmovdqu xmm8,XMMWORD[r8]
+ sub r14,9
+
+ mov eax,DWORD[r15]
+ mov ebx,DWORD[4+r15]
+ mov ecx,DWORD[8+r15]
+ mov edx,DWORD[12+r15]
+ mov r8d,DWORD[16+r15]
+ mov r9d,DWORD[20+r15]
+ mov r10d,DWORD[24+r15]
+ mov r11d,DWORD[28+r15]
+
+ vmovdqa xmm14,XMMWORD[r14*8+r13]
+ vmovdqa xmm13,XMMWORD[16+r14*8+r13]
+ vmovdqa xmm12,XMMWORD[32+r14*8+r13]
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ jmp NEAR $L$loop_xop
+ALIGN 16
+$L$loop_xop:
+ vmovdqa xmm7,XMMWORD[((K256+512))]
+ vmovdqu xmm0,XMMWORD[r12*1+rsi]
+ vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
+ vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
+ vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
+ vpshufb xmm0,xmm0,xmm7
+ lea rbp,[K256]
+ vpshufb xmm1,xmm1,xmm7
+ vpshufb xmm2,xmm2,xmm7
+ vpaddd xmm4,xmm0,XMMWORD[rbp]
+ vpshufb xmm3,xmm3,xmm7
+ vpaddd xmm5,xmm1,XMMWORD[32+rbp]
+ vpaddd xmm6,xmm2,XMMWORD[64+rbp]
+ vpaddd xmm7,xmm3,XMMWORD[96+rbp]
+ vmovdqa XMMWORD[rsp],xmm4
+ mov r14d,eax
+ vmovdqa XMMWORD[16+rsp],xmm5
+ mov esi,ebx
+ vmovdqa XMMWORD[32+rsp],xmm6
+ xor esi,ecx
+ vmovdqa XMMWORD[48+rsp],xmm7
+ mov r13d,r8d
+ jmp NEAR $L$xop_00_47
+
+ALIGN 16
+$L$xop_00_47:
+ sub rbp,-16*2*4
+ vmovdqu xmm9,XMMWORD[r12]
+ mov QWORD[((64+0))+rsp],r12
+ vpalignr xmm4,xmm1,xmm0,4
+ ror r13d,14
+ mov eax,r14d
+ vpalignr xmm7,xmm3,xmm2,4
+ mov r12d,r9d
+ xor r13d,r8d
+DB 143,232,120,194,236,14
+ ror r14d,9
+ xor r12d,r10d
+ vpsrld xmm4,xmm4,3
+ ror r13d,5
+ xor r14d,eax
+ vpaddd xmm0,xmm0,xmm7
+ and r12d,r8d
+ vpxor xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((16-128))+rdi]
+ xor r13d,r8d
+ add r11d,DWORD[rsp]
+ mov r15d,eax
+DB 143,232,120,194,245,11
+ ror r14d,11
+ xor r12d,r10d
+ vpxor xmm4,xmm4,xmm5
+ xor r15d,ebx
+ ror r13d,6
+ add r11d,r12d
+ and esi,r15d
+DB 143,232,120,194,251,13
+ xor r14d,eax
+ add r11d,r13d
+ vpxor xmm4,xmm4,xmm6
+ xor esi,ebx
+ add edx,r11d
+ vpsrld xmm6,xmm3,10
+ ror r14d,2
+ add r11d,esi
+ vpaddd xmm0,xmm0,xmm4
+ mov r13d,edx
+ add r14d,r11d
+DB 143,232,120,194,239,2
+ ror r13d,14
+ mov r11d,r14d
+ vpxor xmm7,xmm7,xmm6
+ mov r12d,r8d
+ xor r13d,edx
+ ror r14d,9
+ xor r12d,r9d
+ vpxor xmm7,xmm7,xmm5
+ ror r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ vpxor xmm9,xmm9,xmm8
+ xor r13d,edx
+ vpsrldq xmm7,xmm7,8
+ add r10d,DWORD[4+rsp]
+ mov esi,r11d
+ ror r14d,11
+ xor r12d,r9d
+ vpaddd xmm0,xmm0,xmm7
+ xor esi,eax
+ ror r13d,6
+ add r10d,r12d
+ and r15d,esi
+DB 143,232,120,194,248,13
+ xor r14d,r11d
+ add r10d,r13d
+ vpsrld xmm6,xmm0,10
+ xor r15d,eax
+ add ecx,r10d
+DB 143,232,120,194,239,2
+ ror r14d,2
+ add r10d,r15d
+ vpxor xmm7,xmm7,xmm6
+ mov r13d,ecx
+ add r14d,r10d
+ ror r13d,14
+ mov r10d,r14d
+ vpxor xmm7,xmm7,xmm5
+ mov r12d,edx
+ xor r13d,ecx
+ ror r14d,9
+ xor r12d,r8d
+ vpslldq xmm7,xmm7,8
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((32-128))+rdi]
+ xor r13d,ecx
+ vpaddd xmm0,xmm0,xmm7
+ add r9d,DWORD[8+rsp]
+ mov r15d,r10d
+ ror r14d,11
+ xor r12d,r8d
+ vpaddd xmm6,xmm0,XMMWORD[rbp]
+ xor r15d,r11d
+ ror r13d,6
+ add r9d,r12d
+ and esi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor esi,r11d
+ add ebx,r9d
+ ror r14d,2
+ add r9d,esi
+ mov r13d,ebx
+ add r14d,r9d
+ ror r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ xor r13d,ebx
+ ror r14d,9
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((48-128))+rdi]
+ xor r13d,ebx
+ add r8d,DWORD[12+rsp]
+ mov esi,r9d
+ ror r14d,11
+ xor r12d,edx
+ xor esi,r10d
+ ror r13d,6
+ add r8d,r12d
+ and r15d,esi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ add eax,r8d
+ ror r14d,2
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ vmovdqa XMMWORD[rsp],xmm6
+ vpalignr xmm4,xmm2,xmm1,4
+ ror r13d,14
+ mov r8d,r14d
+ vpalignr xmm7,xmm0,xmm3,4
+ mov r12d,ebx
+ xor r13d,eax
+DB 143,232,120,194,236,14
+ ror r14d,9
+ xor r12d,ecx
+ vpsrld xmm4,xmm4,3
+ ror r13d,5
+ xor r14d,r8d
+ vpaddd xmm1,xmm1,xmm7
+ and r12d,eax
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((64-128))+rdi]
+ xor r13d,eax
+ add edx,DWORD[16+rsp]
+ mov r15d,r8d
+DB 143,232,120,194,245,11
+ ror r14d,11
+ xor r12d,ecx
+ vpxor xmm4,xmm4,xmm5
+ xor r15d,r9d
+ ror r13d,6
+ add edx,r12d
+ and esi,r15d
+DB 143,232,120,194,248,13
+ xor r14d,r8d
+ add edx,r13d
+ vpxor xmm4,xmm4,xmm6
+ xor esi,r9d
+ add r11d,edx
+ vpsrld xmm6,xmm0,10
+ ror r14d,2
+ add edx,esi
+ vpaddd xmm1,xmm1,xmm4
+ mov r13d,r11d
+ add r14d,edx
+DB 143,232,120,194,239,2
+ ror r13d,14
+ mov edx,r14d
+ vpxor xmm7,xmm7,xmm6
+ mov r12d,eax
+ xor r13d,r11d
+ ror r14d,9
+ xor r12d,ebx
+ vpxor xmm7,xmm7,xmm5
+ ror r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((80-128))+rdi]
+ xor r13d,r11d
+ vpsrldq xmm7,xmm7,8
+ add ecx,DWORD[20+rsp]
+ mov esi,edx
+ ror r14d,11
+ xor r12d,ebx
+ vpaddd xmm1,xmm1,xmm7
+ xor esi,r8d
+ ror r13d,6
+ add ecx,r12d
+ and r15d,esi
+DB 143,232,120,194,249,13
+ xor r14d,edx
+ add ecx,r13d
+ vpsrld xmm6,xmm1,10
+ xor r15d,r8d
+ add r10d,ecx
+DB 143,232,120,194,239,2
+ ror r14d,2
+ add ecx,r15d
+ vpxor xmm7,xmm7,xmm6
+ mov r13d,r10d
+ add r14d,ecx
+ ror r13d,14
+ mov ecx,r14d
+ vpxor xmm7,xmm7,xmm5
+ mov r12d,r11d
+ xor r13d,r10d
+ ror r14d,9
+ xor r12d,eax
+ vpslldq xmm7,xmm7,8
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((96-128))+rdi]
+ xor r13d,r10d
+ vpaddd xmm1,xmm1,xmm7
+ add ebx,DWORD[24+rsp]
+ mov r15d,ecx
+ ror r14d,11
+ xor r12d,eax
+ vpaddd xmm6,xmm1,XMMWORD[32+rbp]
+ xor r15d,edx
+ ror r13d,6
+ add ebx,r12d
+ and esi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor esi,edx
+ add r9d,ebx
+ ror r14d,2
+ add ebx,esi
+ mov r13d,r9d
+ add r14d,ebx
+ ror r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ xor r13d,r9d
+ ror r14d,9
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((112-128))+rdi]
+ xor r13d,r9d
+ add eax,DWORD[28+rsp]
+ mov esi,ebx
+ ror r14d,11
+ xor r12d,r11d
+ xor esi,ecx
+ ror r13d,6
+ add eax,r12d
+ and r15d,esi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ add r8d,eax
+ ror r14d,2
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ vmovdqa XMMWORD[16+rsp],xmm6
+ vpalignr xmm4,xmm3,xmm2,4
+ ror r13d,14
+ mov eax,r14d
+ vpalignr xmm7,xmm1,xmm0,4
+ mov r12d,r9d
+ xor r13d,r8d
+DB 143,232,120,194,236,14
+ ror r14d,9
+ xor r12d,r10d
+ vpsrld xmm4,xmm4,3
+ ror r13d,5
+ xor r14d,eax
+ vpaddd xmm2,xmm2,xmm7
+ and r12d,r8d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((128-128))+rdi]
+ xor r13d,r8d
+ add r11d,DWORD[32+rsp]
+ mov r15d,eax
+DB 143,232,120,194,245,11
+ ror r14d,11
+ xor r12d,r10d
+ vpxor xmm4,xmm4,xmm5
+ xor r15d,ebx
+ ror r13d,6
+ add r11d,r12d
+ and esi,r15d
+DB 143,232,120,194,249,13
+ xor r14d,eax
+ add r11d,r13d
+ vpxor xmm4,xmm4,xmm6
+ xor esi,ebx
+ add edx,r11d
+ vpsrld xmm6,xmm1,10
+ ror r14d,2
+ add r11d,esi
+ vpaddd xmm2,xmm2,xmm4
+ mov r13d,edx
+ add r14d,r11d
+DB 143,232,120,194,239,2
+ ror r13d,14
+ mov r11d,r14d
+ vpxor xmm7,xmm7,xmm6
+ mov r12d,r8d
+ xor r13d,edx
+ ror r14d,9
+ xor r12d,r9d
+ vpxor xmm7,xmm7,xmm5
+ ror r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((144-128))+rdi]
+ xor r13d,edx
+ vpsrldq xmm7,xmm7,8
+ add r10d,DWORD[36+rsp]
+ mov esi,r11d
+ ror r14d,11
+ xor r12d,r9d
+ vpaddd xmm2,xmm2,xmm7
+ xor esi,eax
+ ror r13d,6
+ add r10d,r12d
+ and r15d,esi
+DB 143,232,120,194,250,13
+ xor r14d,r11d
+ add r10d,r13d
+ vpsrld xmm6,xmm2,10
+ xor r15d,eax
+ add ecx,r10d
+DB 143,232,120,194,239,2
+ ror r14d,2
+ add r10d,r15d
+ vpxor xmm7,xmm7,xmm6
+ mov r13d,ecx
+ add r14d,r10d
+ ror r13d,14
+ mov r10d,r14d
+ vpxor xmm7,xmm7,xmm5
+ mov r12d,edx
+ xor r13d,ecx
+ ror r14d,9
+ xor r12d,r8d
+ vpslldq xmm7,xmm7,8
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((160-128))+rdi]
+ xor r13d,ecx
+ vpaddd xmm2,xmm2,xmm7
+ add r9d,DWORD[40+rsp]
+ mov r15d,r10d
+ ror r14d,11
+ xor r12d,r8d
+ vpaddd xmm6,xmm2,XMMWORD[64+rbp]
+ xor r15d,r11d
+ ror r13d,6
+ add r9d,r12d
+ and esi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor esi,r11d
+ add ebx,r9d
+ ror r14d,2
+ add r9d,esi
+ mov r13d,ebx
+ add r14d,r9d
+ ror r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ xor r13d,ebx
+ ror r14d,9
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((176-128))+rdi]
+ xor r13d,ebx
+ add r8d,DWORD[44+rsp]
+ mov esi,r9d
+ ror r14d,11
+ xor r12d,edx
+ xor esi,r10d
+ ror r13d,6
+ add r8d,r12d
+ and r15d,esi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ add eax,r8d
+ ror r14d,2
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ vmovdqa XMMWORD[32+rsp],xmm6
+ vpalignr xmm4,xmm0,xmm3,4
+ ror r13d,14
+ mov r8d,r14d
+ vpalignr xmm7,xmm2,xmm1,4
+ mov r12d,ebx
+ xor r13d,eax
+DB 143,232,120,194,236,14
+ ror r14d,9
+ xor r12d,ecx
+ vpsrld xmm4,xmm4,3
+ ror r13d,5
+ xor r14d,r8d
+ vpaddd xmm3,xmm3,xmm7
+ and r12d,eax
+ vpand xmm8,xmm11,xmm12
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((192-128))+rdi]
+ xor r13d,eax
+ add edx,DWORD[48+rsp]
+ mov r15d,r8d
+DB 143,232,120,194,245,11
+ ror r14d,11
+ xor r12d,ecx
+ vpxor xmm4,xmm4,xmm5
+ xor r15d,r9d
+ ror r13d,6
+ add edx,r12d
+ and esi,r15d
+DB 143,232,120,194,250,13
+ xor r14d,r8d
+ add edx,r13d
+ vpxor xmm4,xmm4,xmm6
+ xor esi,r9d
+ add r11d,edx
+ vpsrld xmm6,xmm2,10
+ ror r14d,2
+ add edx,esi
+ vpaddd xmm3,xmm3,xmm4
+ mov r13d,r11d
+ add r14d,edx
+DB 143,232,120,194,239,2
+ ror r13d,14
+ mov edx,r14d
+ vpxor xmm7,xmm7,xmm6
+ mov r12d,eax
+ xor r13d,r11d
+ ror r14d,9
+ xor r12d,ebx
+ vpxor xmm7,xmm7,xmm5
+ ror r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((208-128))+rdi]
+ xor r13d,r11d
+ vpsrldq xmm7,xmm7,8
+ add ecx,DWORD[52+rsp]
+ mov esi,edx
+ ror r14d,11
+ xor r12d,ebx
+ vpaddd xmm3,xmm3,xmm7
+ xor esi,r8d
+ ror r13d,6
+ add ecx,r12d
+ and r15d,esi
+DB 143,232,120,194,251,13
+ xor r14d,edx
+ add ecx,r13d
+ vpsrld xmm6,xmm3,10
+ xor r15d,r8d
+ add r10d,ecx
+DB 143,232,120,194,239,2
+ ror r14d,2
+ add ecx,r15d
+ vpxor xmm7,xmm7,xmm6
+ mov r13d,r10d
+ add r14d,ecx
+ ror r13d,14
+ mov ecx,r14d
+ vpxor xmm7,xmm7,xmm5
+ mov r12d,r11d
+ xor r13d,r10d
+ ror r14d,9
+ xor r12d,eax
+ vpslldq xmm7,xmm7,8
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ vpand xmm11,xmm11,xmm13
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((224-128))+rdi]
+ xor r13d,r10d
+ vpaddd xmm3,xmm3,xmm7
+ add ebx,DWORD[56+rsp]
+ mov r15d,ecx
+ ror r14d,11
+ xor r12d,eax
+ vpaddd xmm6,xmm3,XMMWORD[96+rbp]
+ xor r15d,edx
+ ror r13d,6
+ add ebx,r12d
+ and esi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor esi,edx
+ add r9d,ebx
+ ror r14d,2
+ add ebx,esi
+ mov r13d,r9d
+ add r14d,ebx
+ ror r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ xor r13d,r9d
+ ror r14d,9
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vpor xmm8,xmm8,xmm11
+ vaesenclast xmm11,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ xor r13d,r9d
+ add eax,DWORD[60+rsp]
+ mov esi,ebx
+ ror r14d,11
+ xor r12d,r11d
+ xor esi,ecx
+ ror r13d,6
+ add eax,r12d
+ and r15d,esi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ add r8d,eax
+ ror r14d,2
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ vmovdqa XMMWORD[48+rsp],xmm6
+ mov r12,QWORD[((64+0))+rsp]
+ vpand xmm11,xmm11,xmm14
+ mov r15,QWORD[((64+8))+rsp]
+ vpor xmm8,xmm8,xmm11
+ vmovdqu XMMWORD[r12*1+r15],xmm8
+ lea r12,[16+r12]
+ cmp BYTE[131+rbp],0
+ jne NEAR $L$xop_00_47
+ vmovdqu xmm9,XMMWORD[r12]
+ mov QWORD[((64+0))+rsp],r12
+ ror r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ xor r13d,r8d
+ ror r14d,9
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ vpxor xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((16-128))+rdi]
+ xor r13d,r8d
+ add r11d,DWORD[rsp]
+ mov r15d,eax
+ ror r14d,11
+ xor r12d,r10d
+ xor r15d,ebx
+ ror r13d,6
+ add r11d,r12d
+ and esi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor esi,ebx
+ add edx,r11d
+ ror r14d,2
+ add r11d,esi
+ mov r13d,edx
+ add r14d,r11d
+ ror r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ xor r13d,edx
+ ror r14d,9
+ xor r12d,r9d
+ ror r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ vpxor xmm9,xmm9,xmm8
+ xor r13d,edx
+ add r10d,DWORD[4+rsp]
+ mov esi,r11d
+ ror r14d,11
+ xor r12d,r9d
+ xor esi,eax
+ ror r13d,6
+ add r10d,r12d
+ and r15d,esi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ add ecx,r10d
+ ror r14d,2
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ xor r13d,ecx
+ ror r14d,9
+ xor r12d,r8d
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((32-128))+rdi]
+ xor r13d,ecx
+ add r9d,DWORD[8+rsp]
+ mov r15d,r10d
+ ror r14d,11
+ xor r12d,r8d
+ xor r15d,r11d
+ ror r13d,6
+ add r9d,r12d
+ and esi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor esi,r11d
+ add ebx,r9d
+ ror r14d,2
+ add r9d,esi
+ mov r13d,ebx
+ add r14d,r9d
+ ror r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ xor r13d,ebx
+ ror r14d,9
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((48-128))+rdi]
+ xor r13d,ebx
+ add r8d,DWORD[12+rsp]
+ mov esi,r9d
+ ror r14d,11
+ xor r12d,edx
+ xor esi,r10d
+ ror r13d,6
+ add r8d,r12d
+ and r15d,esi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ add eax,r8d
+ ror r14d,2
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ ror r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ xor r13d,eax
+ ror r14d,9
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((64-128))+rdi]
+ xor r13d,eax
+ add edx,DWORD[16+rsp]
+ mov r15d,r8d
+ ror r14d,11
+ xor r12d,ecx
+ xor r15d,r9d
+ ror r13d,6
+ add edx,r12d
+ and esi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor esi,r9d
+ add r11d,edx
+ ror r14d,2
+ add edx,esi
+ mov r13d,r11d
+ add r14d,edx
+ ror r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ xor r13d,r11d
+ ror r14d,9
+ xor r12d,ebx
+ ror r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((80-128))+rdi]
+ xor r13d,r11d
+ add ecx,DWORD[20+rsp]
+ mov esi,edx
+ ror r14d,11
+ xor r12d,ebx
+ xor esi,r8d
+ ror r13d,6
+ add ecx,r12d
+ and r15d,esi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ add r10d,ecx
+ ror r14d,2
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ xor r13d,r10d
+ ror r14d,9
+ xor r12d,eax
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((96-128))+rdi]
+ xor r13d,r10d
+ add ebx,DWORD[24+rsp]
+ mov r15d,ecx
+ ror r14d,11
+ xor r12d,eax
+ xor r15d,edx
+ ror r13d,6
+ add ebx,r12d
+ and esi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor esi,edx
+ add r9d,ebx
+ ror r14d,2
+ add ebx,esi
+ mov r13d,r9d
+ add r14d,ebx
+ ror r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ xor r13d,r9d
+ ror r14d,9
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((112-128))+rdi]
+ xor r13d,r9d
+ add eax,DWORD[28+rsp]
+ mov esi,ebx
+ ror r14d,11
+ xor r12d,r11d
+ xor esi,ecx
+ ror r13d,6
+ add eax,r12d
+ and r15d,esi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ add r8d,eax
+ ror r14d,2
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ ror r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ xor r13d,r8d
+ ror r14d,9
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((128-128))+rdi]
+ xor r13d,r8d
+ add r11d,DWORD[32+rsp]
+ mov r15d,eax
+ ror r14d,11
+ xor r12d,r10d
+ xor r15d,ebx
+ ror r13d,6
+ add r11d,r12d
+ and esi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor esi,ebx
+ add edx,r11d
+ ror r14d,2
+ add r11d,esi
+ mov r13d,edx
+ add r14d,r11d
+ ror r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ xor r13d,edx
+ ror r14d,9
+ xor r12d,r9d
+ ror r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((144-128))+rdi]
+ xor r13d,edx
+ add r10d,DWORD[36+rsp]
+ mov esi,r11d
+ ror r14d,11
+ xor r12d,r9d
+ xor esi,eax
+ ror r13d,6
+ add r10d,r12d
+ and r15d,esi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ add ecx,r10d
+ ror r14d,2
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ xor r13d,ecx
+ ror r14d,9
+ xor r12d,r8d
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((160-128))+rdi]
+ xor r13d,ecx
+ add r9d,DWORD[40+rsp]
+ mov r15d,r10d
+ ror r14d,11
+ xor r12d,r8d
+ xor r15d,r11d
+ ror r13d,6
+ add r9d,r12d
+ and esi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor esi,r11d
+ add ebx,r9d
+ ror r14d,2
+ add r9d,esi
+ mov r13d,ebx
+ add r14d,r9d
+ ror r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ xor r13d,ebx
+ ror r14d,9
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((176-128))+rdi]
+ xor r13d,ebx
+ add r8d,DWORD[44+rsp]
+ mov esi,r9d
+ ror r14d,11
+ xor r12d,edx
+ xor esi,r10d
+ ror r13d,6
+ add r8d,r12d
+ and r15d,esi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ add eax,r8d
+ ror r14d,2
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ ror r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ xor r13d,eax
+ ror r14d,9
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ vpand xmm8,xmm11,xmm12
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((192-128))+rdi]
+ xor r13d,eax
+ add edx,DWORD[48+rsp]
+ mov r15d,r8d
+ ror r14d,11
+ xor r12d,ecx
+ xor r15d,r9d
+ ror r13d,6
+ add edx,r12d
+ and esi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor esi,r9d
+ add r11d,edx
+ ror r14d,2
+ add edx,esi
+ mov r13d,r11d
+ add r14d,edx
+ ror r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ xor r13d,r11d
+ ror r14d,9
+ xor r12d,ebx
+ ror r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((208-128))+rdi]
+ xor r13d,r11d
+ add ecx,DWORD[52+rsp]
+ mov esi,edx
+ ror r14d,11
+ xor r12d,ebx
+ xor esi,r8d
+ ror r13d,6
+ add ecx,r12d
+ and r15d,esi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ add r10d,ecx
+ ror r14d,2
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ xor r13d,r10d
+ ror r14d,9
+ xor r12d,eax
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ vpand xmm11,xmm11,xmm13
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((224-128))+rdi]
+ xor r13d,r10d
+ add ebx,DWORD[56+rsp]
+ mov r15d,ecx
+ ror r14d,11
+ xor r12d,eax
+ xor r15d,edx
+ ror r13d,6
+ add ebx,r12d
+ and esi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor esi,edx
+ add r9d,ebx
+ ror r14d,2
+ add ebx,esi
+ mov r13d,r9d
+ add r14d,ebx
+ ror r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ xor r13d,r9d
+ ror r14d,9
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vpor xmm8,xmm8,xmm11
+ vaesenclast xmm11,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ xor r13d,r9d
+ add eax,DWORD[60+rsp]
+ mov esi,ebx
+ ror r14d,11
+ xor r12d,r11d
+ xor esi,ecx
+ ror r13d,6
+ add eax,r12d
+ and r15d,esi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ add r8d,eax
+ ror r14d,2
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ mov r12,QWORD[((64+0))+rsp]
+ mov r13,QWORD[((64+8))+rsp]
+ mov r15,QWORD[((64+40))+rsp]
+ mov rsi,QWORD[((64+48))+rsp]
+
+ vpand xmm11,xmm11,xmm14
+ mov eax,r14d
+ vpor xmm8,xmm8,xmm11
+ vmovdqu XMMWORD[r13*1+r12],xmm8
+ lea r12,[16+r12]
+
+ add eax,DWORD[r15]
+ add ebx,DWORD[4+r15]
+ add ecx,DWORD[8+r15]
+ add edx,DWORD[12+r15]
+ add r8d,DWORD[16+r15]
+ add r9d,DWORD[20+r15]
+ add r10d,DWORD[24+r15]
+ add r11d,DWORD[28+r15]
+
+ cmp r12,QWORD[((64+16))+rsp]
+
+ mov DWORD[r15],eax
+ mov DWORD[4+r15],ebx
+ mov DWORD[8+r15],ecx
+ mov DWORD[12+r15],edx
+ mov DWORD[16+r15],r8d
+ mov DWORD[20+r15],r9d
+ mov DWORD[24+r15],r10d
+ mov DWORD[28+r15],r11d
+
+ jb NEAR $L$loop_xop
+
+ mov r8,QWORD[((64+32))+rsp]
+ mov rsi,QWORD[120+rsp]
+
+ vmovdqu XMMWORD[r8],xmm8
+ vzeroall
+ movaps xmm6,XMMWORD[128+rsp]
+ movaps xmm7,XMMWORD[144+rsp]
+ movaps xmm8,XMMWORD[160+rsp]
+ movaps xmm9,XMMWORD[176+rsp]
+ movaps xmm10,XMMWORD[192+rsp]
+ movaps xmm11,XMMWORD[208+rsp]
+ movaps xmm12,XMMWORD[224+rsp]
+ movaps xmm13,XMMWORD[240+rsp]
+ movaps xmm14,XMMWORD[256+rsp]
+ movaps xmm15,XMMWORD[272+rsp]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$epilogue_xop:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_aesni_cbc_sha256_enc_xop:
+
+ALIGN 64
+aesni_cbc_sha256_enc_avx:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_sha256_enc_avx:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+
+$L$avx_shortcut:
+ mov r10,QWORD[56+rsp]
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ sub rsp,288
+ and rsp,-64
+
+ shl rdx,6
+ sub rsi,rdi
+ sub r10,rdi
+ add rdx,rdi
+
+
+ mov QWORD[((64+8))+rsp],rsi
+ mov QWORD[((64+16))+rsp],rdx
+
+ mov QWORD[((64+32))+rsp],r8
+ mov QWORD[((64+40))+rsp],r9
+ mov QWORD[((64+48))+rsp],r10
+ mov QWORD[120+rsp],rax
+
+ movaps XMMWORD[128+rsp],xmm6
+ movaps XMMWORD[144+rsp],xmm7
+ movaps XMMWORD[160+rsp],xmm8
+ movaps XMMWORD[176+rsp],xmm9
+ movaps XMMWORD[192+rsp],xmm10
+ movaps XMMWORD[208+rsp],xmm11
+ movaps XMMWORD[224+rsp],xmm12
+ movaps XMMWORD[240+rsp],xmm13
+ movaps XMMWORD[256+rsp],xmm14
+ movaps XMMWORD[272+rsp],xmm15
+$L$prologue_avx:
+ vzeroall
+
+ mov r12,rdi
+ lea rdi,[128+rcx]
+ lea r13,[((K256+544))]
+ mov r14d,DWORD[((240-128))+rdi]
+ mov r15,r9
+ mov rsi,r10
+ vmovdqu xmm8,XMMWORD[r8]
+ sub r14,9
+
+ mov eax,DWORD[r15]
+ mov ebx,DWORD[4+r15]
+ mov ecx,DWORD[8+r15]
+ mov edx,DWORD[12+r15]
+ mov r8d,DWORD[16+r15]
+ mov r9d,DWORD[20+r15]
+ mov r10d,DWORD[24+r15]
+ mov r11d,DWORD[28+r15]
+
+ vmovdqa xmm14,XMMWORD[r14*8+r13]
+ vmovdqa xmm13,XMMWORD[16+r14*8+r13]
+ vmovdqa xmm12,XMMWORD[32+r14*8+r13]
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ jmp NEAR $L$loop_avx
+ALIGN 16
+$L$loop_avx:
+ vmovdqa xmm7,XMMWORD[((K256+512))]
+ vmovdqu xmm0,XMMWORD[r12*1+rsi]
+ vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
+ vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
+ vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
+ vpshufb xmm0,xmm0,xmm7
+ lea rbp,[K256]
+ vpshufb xmm1,xmm1,xmm7
+ vpshufb xmm2,xmm2,xmm7
+ vpaddd xmm4,xmm0,XMMWORD[rbp]
+ vpshufb xmm3,xmm3,xmm7
+ vpaddd xmm5,xmm1,XMMWORD[32+rbp]
+ vpaddd xmm6,xmm2,XMMWORD[64+rbp]
+ vpaddd xmm7,xmm3,XMMWORD[96+rbp]
+ vmovdqa XMMWORD[rsp],xmm4
+ mov r14d,eax
+ vmovdqa XMMWORD[16+rsp],xmm5
+ mov esi,ebx
+ vmovdqa XMMWORD[32+rsp],xmm6
+ xor esi,ecx
+ vmovdqa XMMWORD[48+rsp],xmm7
+ mov r13d,r8d
+ jmp NEAR $L$avx_00_47
+
+ALIGN 16
+$L$avx_00_47:
+ sub rbp,-16*2*4
+ vmovdqu xmm9,XMMWORD[r12]
+ mov QWORD[((64+0))+rsp],r12
+ vpalignr xmm4,xmm1,xmm0,4
+ shrd r13d,r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ vpalignr xmm7,xmm3,xmm2,4
+ xor r13d,r8d
+ shrd r14d,r14d,9
+ xor r12d,r10d
+ vpsrld xmm6,xmm4,7
+ shrd r13d,r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ vpaddd xmm0,xmm0,xmm7
+ vpxor xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((16-128))+rdi]
+ xor r13d,r8d
+ add r11d,DWORD[rsp]
+ mov r15d,eax
+ vpsrld xmm7,xmm4,3
+ shrd r14d,r14d,11
+ xor r12d,r10d
+ xor r15d,ebx
+ vpslld xmm5,xmm4,14
+ shrd r13d,r13d,6
+ add r11d,r12d
+ and esi,r15d
+ vpxor xmm4,xmm7,xmm6
+ xor r14d,eax
+ add r11d,r13d
+ xor esi,ebx
+ vpshufd xmm7,xmm3,250
+ add edx,r11d
+ shrd r14d,r14d,2
+ add r11d,esi
+ vpsrld xmm6,xmm6,11
+ mov r13d,edx
+ add r14d,r11d
+ shrd r13d,r13d,14
+ vpxor xmm4,xmm4,xmm5
+ mov r11d,r14d
+ mov r12d,r8d
+ xor r13d,edx
+ vpslld xmm5,xmm5,11
+ shrd r14d,r14d,9
+ xor r12d,r9d
+ shrd r13d,r13d,5
+ vpxor xmm4,xmm4,xmm6
+ xor r14d,r11d
+ and r12d,edx
+ vpxor xmm9,xmm9,xmm8
+ xor r13d,edx
+ vpsrld xmm6,xmm7,10
+ add r10d,DWORD[4+rsp]
+ mov esi,r11d
+ shrd r14d,r14d,11
+ vpxor xmm4,xmm4,xmm5
+ xor r12d,r9d
+ xor esi,eax
+ shrd r13d,r13d,6
+ vpsrlq xmm7,xmm7,17
+ add r10d,r12d
+ and r15d,esi
+ xor r14d,r11d
+ vpaddd xmm0,xmm0,xmm4
+ add r10d,r13d
+ xor r15d,eax
+ add ecx,r10d
+ vpxor xmm6,xmm6,xmm7
+ shrd r14d,r14d,2
+ add r10d,r15d
+ mov r13d,ecx
+ vpsrlq xmm7,xmm7,2
+ add r14d,r10d
+ shrd r13d,r13d,14
+ mov r10d,r14d
+ vpxor xmm6,xmm6,xmm7
+ mov r12d,edx
+ xor r13d,ecx
+ shrd r14d,r14d,9
+ vpshufd xmm6,xmm6,132
+ xor r12d,r8d
+ shrd r13d,r13d,5
+ xor r14d,r10d
+ vpsrldq xmm6,xmm6,8
+ and r12d,ecx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((32-128))+rdi]
+ xor r13d,ecx
+ add r9d,DWORD[8+rsp]
+ vpaddd xmm0,xmm0,xmm6
+ mov r15d,r10d
+ shrd r14d,r14d,11
+ xor r12d,r8d
+ vpshufd xmm7,xmm0,80
+ xor r15d,r11d
+ shrd r13d,r13d,6
+ add r9d,r12d
+ vpsrld xmm6,xmm7,10
+ and esi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ vpsrlq xmm7,xmm7,17
+ xor esi,r11d
+ add ebx,r9d
+ shrd r14d,r14d,2
+ vpxor xmm6,xmm6,xmm7
+ add r9d,esi
+ mov r13d,ebx
+ add r14d,r9d
+ vpsrlq xmm7,xmm7,2
+ shrd r13d,r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ vpxor xmm6,xmm6,xmm7
+ xor r13d,ebx
+ shrd r14d,r14d,9
+ xor r12d,edx
+ vpshufd xmm6,xmm6,232
+ shrd r13d,r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vpslldq xmm6,xmm6,8
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((48-128))+rdi]
+ xor r13d,ebx
+ add r8d,DWORD[12+rsp]
+ mov esi,r9d
+ vpaddd xmm0,xmm0,xmm6
+ shrd r14d,r14d,11
+ xor r12d,edx
+ xor esi,r10d
+ vpaddd xmm6,xmm0,XMMWORD[rbp]
+ shrd r13d,r13d,6
+ add r8d,r12d
+ and r15d,esi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ add eax,r8d
+ shrd r14d,r14d,2
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ vmovdqa XMMWORD[rsp],xmm6
+ vpalignr xmm4,xmm2,xmm1,4
+ shrd r13d,r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ vpalignr xmm7,xmm0,xmm3,4
+ xor r13d,eax
+ shrd r14d,r14d,9
+ xor r12d,ecx
+ vpsrld xmm6,xmm4,7
+ shrd r13d,r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ vpaddd xmm1,xmm1,xmm7
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((64-128))+rdi]
+ xor r13d,eax
+ add edx,DWORD[16+rsp]
+ mov r15d,r8d
+ vpsrld xmm7,xmm4,3
+ shrd r14d,r14d,11
+ xor r12d,ecx
+ xor r15d,r9d
+ vpslld xmm5,xmm4,14
+ shrd r13d,r13d,6
+ add edx,r12d
+ and esi,r15d
+ vpxor xmm4,xmm7,xmm6
+ xor r14d,r8d
+ add edx,r13d
+ xor esi,r9d
+ vpshufd xmm7,xmm0,250
+ add r11d,edx
+ shrd r14d,r14d,2
+ add edx,esi
+ vpsrld xmm6,xmm6,11
+ mov r13d,r11d
+ add r14d,edx
+ shrd r13d,r13d,14
+ vpxor xmm4,xmm4,xmm5
+ mov edx,r14d
+ mov r12d,eax
+ xor r13d,r11d
+ vpslld xmm5,xmm5,11
+ shrd r14d,r14d,9
+ xor r12d,ebx
+ shrd r13d,r13d,5
+ vpxor xmm4,xmm4,xmm6
+ xor r14d,edx
+ and r12d,r11d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((80-128))+rdi]
+ xor r13d,r11d
+ vpsrld xmm6,xmm7,10
+ add ecx,DWORD[20+rsp]
+ mov esi,edx
+ shrd r14d,r14d,11
+ vpxor xmm4,xmm4,xmm5
+ xor r12d,ebx
+ xor esi,r8d
+ shrd r13d,r13d,6
+ vpsrlq xmm7,xmm7,17
+ add ecx,r12d
+ and r15d,esi
+ xor r14d,edx
+ vpaddd xmm1,xmm1,xmm4
+ add ecx,r13d
+ xor r15d,r8d
+ add r10d,ecx
+ vpxor xmm6,xmm6,xmm7
+ shrd r14d,r14d,2
+ add ecx,r15d
+ mov r13d,r10d
+ vpsrlq xmm7,xmm7,2
+ add r14d,ecx
+ shrd r13d,r13d,14
+ mov ecx,r14d
+ vpxor xmm6,xmm6,xmm7
+ mov r12d,r11d
+ xor r13d,r10d
+ shrd r14d,r14d,9
+ vpshufd xmm6,xmm6,132
+ xor r12d,eax
+ shrd r13d,r13d,5
+ xor r14d,ecx
+ vpsrldq xmm6,xmm6,8
+ and r12d,r10d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((96-128))+rdi]
+ xor r13d,r10d
+ add ebx,DWORD[24+rsp]
+ vpaddd xmm1,xmm1,xmm6
+ mov r15d,ecx
+ shrd r14d,r14d,11
+ xor r12d,eax
+ vpshufd xmm7,xmm1,80
+ xor r15d,edx
+ shrd r13d,r13d,6
+ add ebx,r12d
+ vpsrld xmm6,xmm7,10
+ and esi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ vpsrlq xmm7,xmm7,17
+ xor esi,edx
+ add r9d,ebx
+ shrd r14d,r14d,2
+ vpxor xmm6,xmm6,xmm7
+ add ebx,esi
+ mov r13d,r9d
+ add r14d,ebx
+ vpsrlq xmm7,xmm7,2
+ shrd r13d,r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ vpxor xmm6,xmm6,xmm7
+ xor r13d,r9d
+ shrd r14d,r14d,9
+ xor r12d,r11d
+ vpshufd xmm6,xmm6,232
+ shrd r13d,r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vpslldq xmm6,xmm6,8
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((112-128))+rdi]
+ xor r13d,r9d
+ add eax,DWORD[28+rsp]
+ mov esi,ebx
+ vpaddd xmm1,xmm1,xmm6
+ shrd r14d,r14d,11
+ xor r12d,r11d
+ xor esi,ecx
+ vpaddd xmm6,xmm1,XMMWORD[32+rbp]
+ shrd r13d,r13d,6
+ add eax,r12d
+ and r15d,esi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ add r8d,eax
+ shrd r14d,r14d,2
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ vmovdqa XMMWORD[16+rsp],xmm6
+ vpalignr xmm4,xmm3,xmm2,4
+ shrd r13d,r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ vpalignr xmm7,xmm1,xmm0,4
+ xor r13d,r8d
+ shrd r14d,r14d,9
+ xor r12d,r10d
+ vpsrld xmm6,xmm4,7
+ shrd r13d,r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ vpaddd xmm2,xmm2,xmm7
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((128-128))+rdi]
+ xor r13d,r8d
+ add r11d,DWORD[32+rsp]
+ mov r15d,eax
+ vpsrld xmm7,xmm4,3
+ shrd r14d,r14d,11
+ xor r12d,r10d
+ xor r15d,ebx
+ vpslld xmm5,xmm4,14
+ shrd r13d,r13d,6
+ add r11d,r12d
+ and esi,r15d
+ vpxor xmm4,xmm7,xmm6
+ xor r14d,eax
+ add r11d,r13d
+ xor esi,ebx
+ vpshufd xmm7,xmm1,250
+ add edx,r11d
+ shrd r14d,r14d,2
+ add r11d,esi
+ vpsrld xmm6,xmm6,11
+ mov r13d,edx
+ add r14d,r11d
+ shrd r13d,r13d,14
+ vpxor xmm4,xmm4,xmm5
+ mov r11d,r14d
+ mov r12d,r8d
+ xor r13d,edx
+ vpslld xmm5,xmm5,11
+ shrd r14d,r14d,9
+ xor r12d,r9d
+ shrd r13d,r13d,5
+ vpxor xmm4,xmm4,xmm6
+ xor r14d,r11d
+ and r12d,edx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((144-128))+rdi]
+ xor r13d,edx
+ vpsrld xmm6,xmm7,10
+ add r10d,DWORD[36+rsp]
+ mov esi,r11d
+ shrd r14d,r14d,11
+ vpxor xmm4,xmm4,xmm5
+ xor r12d,r9d
+ xor esi,eax
+ shrd r13d,r13d,6
+ vpsrlq xmm7,xmm7,17
+ add r10d,r12d
+ and r15d,esi
+ xor r14d,r11d
+ vpaddd xmm2,xmm2,xmm4
+ add r10d,r13d
+ xor r15d,eax
+ add ecx,r10d
+ vpxor xmm6,xmm6,xmm7
+ shrd r14d,r14d,2
+ add r10d,r15d
+ mov r13d,ecx
+ vpsrlq xmm7,xmm7,2
+ add r14d,r10d
+ shrd r13d,r13d,14
+ mov r10d,r14d
+ vpxor xmm6,xmm6,xmm7
+ mov r12d,edx
+ xor r13d,ecx
+ shrd r14d,r14d,9
+ vpshufd xmm6,xmm6,132
+ xor r12d,r8d
+ shrd r13d,r13d,5
+ xor r14d,r10d
+ vpsrldq xmm6,xmm6,8
+ and r12d,ecx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((160-128))+rdi]
+ xor r13d,ecx
+ add r9d,DWORD[40+rsp]
+ vpaddd xmm2,xmm2,xmm6
+ mov r15d,r10d
+ shrd r14d,r14d,11
+ xor r12d,r8d
+ vpshufd xmm7,xmm2,80
+ xor r15d,r11d
+ shrd r13d,r13d,6
+ add r9d,r12d
+ vpsrld xmm6,xmm7,10
+ and esi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ vpsrlq xmm7,xmm7,17
+ xor esi,r11d
+ add ebx,r9d
+ shrd r14d,r14d,2
+ vpxor xmm6,xmm6,xmm7
+ add r9d,esi
+ mov r13d,ebx
+ add r14d,r9d
+ vpsrlq xmm7,xmm7,2
+ shrd r13d,r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ vpxor xmm6,xmm6,xmm7
+ xor r13d,ebx
+ shrd r14d,r14d,9
+ xor r12d,edx
+ vpshufd xmm6,xmm6,232
+ shrd r13d,r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vpslldq xmm6,xmm6,8
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((176-128))+rdi]
+ xor r13d,ebx
+ add r8d,DWORD[44+rsp]
+ mov esi,r9d
+ vpaddd xmm2,xmm2,xmm6
+ shrd r14d,r14d,11
+ xor r12d,edx
+ xor esi,r10d
+ vpaddd xmm6,xmm2,XMMWORD[64+rbp]
+ shrd r13d,r13d,6
+ add r8d,r12d
+ and r15d,esi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ add eax,r8d
+ shrd r14d,r14d,2
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ vmovdqa XMMWORD[32+rsp],xmm6
+ vpalignr xmm4,xmm0,xmm3,4
+ shrd r13d,r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ vpalignr xmm7,xmm2,xmm1,4
+ xor r13d,eax
+ shrd r14d,r14d,9
+ xor r12d,ecx
+ vpsrld xmm6,xmm4,7
+ shrd r13d,r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ vpaddd xmm3,xmm3,xmm7
+ vpand xmm8,xmm11,xmm12
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((192-128))+rdi]
+ xor r13d,eax
+ add edx,DWORD[48+rsp]
+ mov r15d,r8d
+ vpsrld xmm7,xmm4,3
+ shrd r14d,r14d,11
+ xor r12d,ecx
+ xor r15d,r9d
+ vpslld xmm5,xmm4,14
+ shrd r13d,r13d,6
+ add edx,r12d
+ and esi,r15d
+ vpxor xmm4,xmm7,xmm6
+ xor r14d,r8d
+ add edx,r13d
+ xor esi,r9d
+ vpshufd xmm7,xmm2,250
+ add r11d,edx
+ shrd r14d,r14d,2
+ add edx,esi
+ vpsrld xmm6,xmm6,11
+ mov r13d,r11d
+ add r14d,edx
+ shrd r13d,r13d,14
+ vpxor xmm4,xmm4,xmm5
+ mov edx,r14d
+ mov r12d,eax
+ xor r13d,r11d
+ vpslld xmm5,xmm5,11
+ shrd r14d,r14d,9
+ xor r12d,ebx
+ shrd r13d,r13d,5
+ vpxor xmm4,xmm4,xmm6
+ xor r14d,edx
+ and r12d,r11d
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((208-128))+rdi]
+ xor r13d,r11d
+ vpsrld xmm6,xmm7,10
+ add ecx,DWORD[52+rsp]
+ mov esi,edx
+ shrd r14d,r14d,11
+ vpxor xmm4,xmm4,xmm5
+ xor r12d,ebx
+ xor esi,r8d
+ shrd r13d,r13d,6
+ vpsrlq xmm7,xmm7,17
+ add ecx,r12d
+ and r15d,esi
+ xor r14d,edx
+ vpaddd xmm3,xmm3,xmm4
+ add ecx,r13d
+ xor r15d,r8d
+ add r10d,ecx
+ vpxor xmm6,xmm6,xmm7
+ shrd r14d,r14d,2
+ add ecx,r15d
+ mov r13d,r10d
+ vpsrlq xmm7,xmm7,2
+ add r14d,ecx
+ shrd r13d,r13d,14
+ mov ecx,r14d
+ vpxor xmm6,xmm6,xmm7
+ mov r12d,r11d
+ xor r13d,r10d
+ shrd r14d,r14d,9
+ vpshufd xmm6,xmm6,132
+ xor r12d,eax
+ shrd r13d,r13d,5
+ xor r14d,ecx
+ vpsrldq xmm6,xmm6,8
+ and r12d,r10d
+ vpand xmm11,xmm11,xmm13
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((224-128))+rdi]
+ xor r13d,r10d
+ add ebx,DWORD[56+rsp]
+ vpaddd xmm3,xmm3,xmm6
+ mov r15d,ecx
+ shrd r14d,r14d,11
+ xor r12d,eax
+ vpshufd xmm7,xmm3,80
+ xor r15d,edx
+ shrd r13d,r13d,6
+ add ebx,r12d
+ vpsrld xmm6,xmm7,10
+ and esi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ vpsrlq xmm7,xmm7,17
+ xor esi,edx
+ add r9d,ebx
+ shrd r14d,r14d,2
+ vpxor xmm6,xmm6,xmm7
+ add ebx,esi
+ mov r13d,r9d
+ add r14d,ebx
+ vpsrlq xmm7,xmm7,2
+ shrd r13d,r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ vpxor xmm6,xmm6,xmm7
+ xor r13d,r9d
+ shrd r14d,r14d,9
+ xor r12d,r11d
+ vpshufd xmm6,xmm6,232
+ shrd r13d,r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vpslldq xmm6,xmm6,8
+ vpor xmm8,xmm8,xmm11
+ vaesenclast xmm11,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ xor r13d,r9d
+ add eax,DWORD[60+rsp]
+ mov esi,ebx
+ vpaddd xmm3,xmm3,xmm6
+ shrd r14d,r14d,11
+ xor r12d,r11d
+ xor esi,ecx
+ vpaddd xmm6,xmm3,XMMWORD[96+rbp]
+ shrd r13d,r13d,6
+ add eax,r12d
+ and r15d,esi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ add r8d,eax
+ shrd r14d,r14d,2
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ vmovdqa XMMWORD[48+rsp],xmm6
+ mov r12,QWORD[((64+0))+rsp]
+ vpand xmm11,xmm11,xmm14
+ mov r15,QWORD[((64+8))+rsp]
+ vpor xmm8,xmm8,xmm11
+ vmovdqu XMMWORD[r12*1+r15],xmm8
+ lea r12,[16+r12]
+ cmp BYTE[131+rbp],0
+ jne NEAR $L$avx_00_47
+ vmovdqu xmm9,XMMWORD[r12]
+ mov QWORD[((64+0))+rsp],r12
+ shrd r13d,r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ xor r13d,r8d
+ shrd r14d,r14d,9
+ xor r12d,r10d
+ shrd r13d,r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ vpxor xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((16-128))+rdi]
+ xor r13d,r8d
+ add r11d,DWORD[rsp]
+ mov r15d,eax
+ shrd r14d,r14d,11
+ xor r12d,r10d
+ xor r15d,ebx
+ shrd r13d,r13d,6
+ add r11d,r12d
+ and esi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor esi,ebx
+ add edx,r11d
+ shrd r14d,r14d,2
+ add r11d,esi
+ mov r13d,edx
+ add r14d,r11d
+ shrd r13d,r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ xor r13d,edx
+ shrd r14d,r14d,9
+ xor r12d,r9d
+ shrd r13d,r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ vpxor xmm9,xmm9,xmm8
+ xor r13d,edx
+ add r10d,DWORD[4+rsp]
+ mov esi,r11d
+ shrd r14d,r14d,11
+ xor r12d,r9d
+ xor esi,eax
+ shrd r13d,r13d,6
+ add r10d,r12d
+ and r15d,esi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ add ecx,r10d
+ shrd r14d,r14d,2
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ shrd r13d,r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ xor r13d,ecx
+ shrd r14d,r14d,9
+ xor r12d,r8d
+ shrd r13d,r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((32-128))+rdi]
+ xor r13d,ecx
+ add r9d,DWORD[8+rsp]
+ mov r15d,r10d
+ shrd r14d,r14d,11
+ xor r12d,r8d
+ xor r15d,r11d
+ shrd r13d,r13d,6
+ add r9d,r12d
+ and esi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor esi,r11d
+ add ebx,r9d
+ shrd r14d,r14d,2
+ add r9d,esi
+ mov r13d,ebx
+ add r14d,r9d
+ shrd r13d,r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ xor r13d,ebx
+ shrd r14d,r14d,9
+ xor r12d,edx
+ shrd r13d,r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((48-128))+rdi]
+ xor r13d,ebx
+ add r8d,DWORD[12+rsp]
+ mov esi,r9d
+ shrd r14d,r14d,11
+ xor r12d,edx
+ xor esi,r10d
+ shrd r13d,r13d,6
+ add r8d,r12d
+ and r15d,esi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ add eax,r8d
+ shrd r14d,r14d,2
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ shrd r13d,r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ xor r13d,eax
+ shrd r14d,r14d,9
+ xor r12d,ecx
+ shrd r13d,r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((64-128))+rdi]
+ xor r13d,eax
+ add edx,DWORD[16+rsp]
+ mov r15d,r8d
+ shrd r14d,r14d,11
+ xor r12d,ecx
+ xor r15d,r9d
+ shrd r13d,r13d,6
+ add edx,r12d
+ and esi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor esi,r9d
+ add r11d,edx
+ shrd r14d,r14d,2
+ add edx,esi
+ mov r13d,r11d
+ add r14d,edx
+ shrd r13d,r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ xor r13d,r11d
+ shrd r14d,r14d,9
+ xor r12d,ebx
+ shrd r13d,r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((80-128))+rdi]
+ xor r13d,r11d
+ add ecx,DWORD[20+rsp]
+ mov esi,edx
+ shrd r14d,r14d,11
+ xor r12d,ebx
+ xor esi,r8d
+ shrd r13d,r13d,6
+ add ecx,r12d
+ and r15d,esi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ add r10d,ecx
+ shrd r14d,r14d,2
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ shrd r13d,r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ xor r13d,r10d
+ shrd r14d,r14d,9
+ xor r12d,eax
+ shrd r13d,r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((96-128))+rdi]
+ xor r13d,r10d
+ add ebx,DWORD[24+rsp]
+ mov r15d,ecx
+ shrd r14d,r14d,11
+ xor r12d,eax
+ xor r15d,edx
+ shrd r13d,r13d,6
+ add ebx,r12d
+ and esi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor esi,edx
+ add r9d,ebx
+ shrd r14d,r14d,2
+ add ebx,esi
+ mov r13d,r9d
+ add r14d,ebx
+ shrd r13d,r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ xor r13d,r9d
+ shrd r14d,r14d,9
+ xor r12d,r11d
+ shrd r13d,r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((112-128))+rdi]
+ xor r13d,r9d
+ add eax,DWORD[28+rsp]
+ mov esi,ebx
+ shrd r14d,r14d,11
+ xor r12d,r11d
+ xor esi,ecx
+ shrd r13d,r13d,6
+ add eax,r12d
+ and r15d,esi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ add r8d,eax
+ shrd r14d,r14d,2
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ shrd r13d,r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ xor r13d,r8d
+ shrd r14d,r14d,9
+ xor r12d,r10d
+ shrd r13d,r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((128-128))+rdi]
+ xor r13d,r8d
+ add r11d,DWORD[32+rsp]
+ mov r15d,eax
+ shrd r14d,r14d,11
+ xor r12d,r10d
+ xor r15d,ebx
+ shrd r13d,r13d,6
+ add r11d,r12d
+ and esi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor esi,ebx
+ add edx,r11d
+ shrd r14d,r14d,2
+ add r11d,esi
+ mov r13d,edx
+ add r14d,r11d
+ shrd r13d,r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ xor r13d,edx
+ shrd r14d,r14d,9
+ xor r12d,r9d
+ shrd r13d,r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((144-128))+rdi]
+ xor r13d,edx
+ add r10d,DWORD[36+rsp]
+ mov esi,r11d
+ shrd r14d,r14d,11
+ xor r12d,r9d
+ xor esi,eax
+ shrd r13d,r13d,6
+ add r10d,r12d
+ and r15d,esi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ add ecx,r10d
+ shrd r14d,r14d,2
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ shrd r13d,r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ xor r13d,ecx
+ shrd r14d,r14d,9
+ xor r12d,r8d
+ shrd r13d,r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((160-128))+rdi]
+ xor r13d,ecx
+ add r9d,DWORD[40+rsp]
+ mov r15d,r10d
+ shrd r14d,r14d,11
+ xor r12d,r8d
+ xor r15d,r11d
+ shrd r13d,r13d,6
+ add r9d,r12d
+ and esi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor esi,r11d
+ add ebx,r9d
+ shrd r14d,r14d,2
+ add r9d,esi
+ mov r13d,ebx
+ add r14d,r9d
+ shrd r13d,r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ xor r13d,ebx
+ shrd r14d,r14d,9
+ xor r12d,edx
+ shrd r13d,r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((176-128))+rdi]
+ xor r13d,ebx
+ add r8d,DWORD[44+rsp]
+ mov esi,r9d
+ shrd r14d,r14d,11
+ xor r12d,edx
+ xor esi,r10d
+ shrd r13d,r13d,6
+ add r8d,r12d
+ and r15d,esi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ add eax,r8d
+ shrd r14d,r14d,2
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ shrd r13d,r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ xor r13d,eax
+ shrd r14d,r14d,9
+ xor r12d,ecx
+ shrd r13d,r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ vpand xmm8,xmm11,xmm12
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((192-128))+rdi]
+ xor r13d,eax
+ add edx,DWORD[48+rsp]
+ mov r15d,r8d
+ shrd r14d,r14d,11
+ xor r12d,ecx
+ xor r15d,r9d
+ shrd r13d,r13d,6
+ add edx,r12d
+ and esi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor esi,r9d
+ add r11d,edx
+ shrd r14d,r14d,2
+ add edx,esi
+ mov r13d,r11d
+ add r14d,edx
+ shrd r13d,r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ xor r13d,r11d
+ shrd r14d,r14d,9
+ xor r12d,ebx
+ shrd r13d,r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((208-128))+rdi]
+ xor r13d,r11d
+ add ecx,DWORD[52+rsp]
+ mov esi,edx
+ shrd r14d,r14d,11
+ xor r12d,ebx
+ xor esi,r8d
+ shrd r13d,r13d,6
+ add ecx,r12d
+ and r15d,esi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ add r10d,ecx
+ shrd r14d,r14d,2
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ shrd r13d,r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ xor r13d,r10d
+ shrd r14d,r14d,9
+ xor r12d,eax
+ shrd r13d,r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ vpand xmm11,xmm11,xmm13
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((224-128))+rdi]
+ xor r13d,r10d
+ add ebx,DWORD[56+rsp]
+ mov r15d,ecx
+ shrd r14d,r14d,11
+ xor r12d,eax
+ xor r15d,edx
+ shrd r13d,r13d,6
+ add ebx,r12d
+ and esi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor esi,edx
+ add r9d,ebx
+ shrd r14d,r14d,2
+ add ebx,esi
+ mov r13d,r9d
+ add r14d,ebx
+ shrd r13d,r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ xor r13d,r9d
+ shrd r14d,r14d,9
+ xor r12d,r11d
+ shrd r13d,r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vpor xmm8,xmm8,xmm11
+ vaesenclast xmm11,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ xor r13d,r9d
+ add eax,DWORD[60+rsp]
+ mov esi,ebx
+ shrd r14d,r14d,11
+ xor r12d,r11d
+ xor esi,ecx
+ shrd r13d,r13d,6
+ add eax,r12d
+ and r15d,esi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ add r8d,eax
+ shrd r14d,r14d,2
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ mov r12,QWORD[((64+0))+rsp]
+ mov r13,QWORD[((64+8))+rsp]
+ mov r15,QWORD[((64+40))+rsp]
+ mov rsi,QWORD[((64+48))+rsp]
+
+ vpand xmm11,xmm11,xmm14
+ mov eax,r14d
+ vpor xmm8,xmm8,xmm11
+ vmovdqu XMMWORD[r13*1+r12],xmm8
+ lea r12,[16+r12]
+
+ add eax,DWORD[r15]
+ add ebx,DWORD[4+r15]
+ add ecx,DWORD[8+r15]
+ add edx,DWORD[12+r15]
+ add r8d,DWORD[16+r15]
+ add r9d,DWORD[20+r15]
+ add r10d,DWORD[24+r15]
+ add r11d,DWORD[28+r15]
+
+ cmp r12,QWORD[((64+16))+rsp]
+
+ mov DWORD[r15],eax
+ mov DWORD[4+r15],ebx
+ mov DWORD[8+r15],ecx
+ mov DWORD[12+r15],edx
+ mov DWORD[16+r15],r8d
+ mov DWORD[20+r15],r9d
+ mov DWORD[24+r15],r10d
+ mov DWORD[28+r15],r11d
+ jb NEAR $L$loop_avx
+
+ mov r8,QWORD[((64+32))+rsp]
+ mov rsi,QWORD[120+rsp]
+
+ vmovdqu XMMWORD[r8],xmm8
+ vzeroall
+ movaps xmm6,XMMWORD[128+rsp]
+ movaps xmm7,XMMWORD[144+rsp]
+ movaps xmm8,XMMWORD[160+rsp]
+ movaps xmm9,XMMWORD[176+rsp]
+ movaps xmm10,XMMWORD[192+rsp]
+ movaps xmm11,XMMWORD[208+rsp]
+ movaps xmm12,XMMWORD[224+rsp]
+ movaps xmm13,XMMWORD[240+rsp]
+ movaps xmm14,XMMWORD[256+rsp]
+ movaps xmm15,XMMWORD[272+rsp]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$epilogue_avx:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_aesni_cbc_sha256_enc_avx:
+
+ALIGN 64
+aesni_cbc_sha256_enc_avx2:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_sha256_enc_avx2:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+
+$L$avx2_shortcut:
+ mov r10,QWORD[56+rsp]
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ sub rsp,736
+ and rsp,-256*4
+ add rsp,448
+
+ shl rdx,6
+ sub rsi,rdi
+ sub r10,rdi
+ add rdx,rdi
+
+
+
+ mov QWORD[((64+16))+rsp],rdx
+
+ mov QWORD[((64+32))+rsp],r8
+ mov QWORD[((64+40))+rsp],r9
+ mov QWORD[((64+48))+rsp],r10
+ mov QWORD[120+rsp],rax
+
+ movaps XMMWORD[128+rsp],xmm6
+ movaps XMMWORD[144+rsp],xmm7
+ movaps XMMWORD[160+rsp],xmm8
+ movaps XMMWORD[176+rsp],xmm9
+ movaps XMMWORD[192+rsp],xmm10
+ movaps XMMWORD[208+rsp],xmm11
+ movaps XMMWORD[224+rsp],xmm12
+ movaps XMMWORD[240+rsp],xmm13
+ movaps XMMWORD[256+rsp],xmm14
+ movaps XMMWORD[272+rsp],xmm15
+$L$prologue_avx2:
+ vzeroall
+
+ mov r13,rdi
+ vpinsrq xmm15,xmm15,rsi,1
+ lea rdi,[128+rcx]
+ lea r12,[((K256+544))]
+ mov r14d,DWORD[((240-128))+rdi]
+ mov r15,r9
+ mov rsi,r10
+ vmovdqu xmm8,XMMWORD[r8]
+ lea r14,[((-9))+r14]
+
+ vmovdqa xmm14,XMMWORD[r14*8+r12]
+ vmovdqa xmm13,XMMWORD[16+r14*8+r12]
+ vmovdqa xmm12,XMMWORD[32+r14*8+r12]
+
+ sub r13,-16*4
+ mov eax,DWORD[r15]
+ lea r12,[r13*1+rsi]
+ mov ebx,DWORD[4+r15]
+ cmp r13,rdx
+ mov ecx,DWORD[8+r15]
+ cmove r12,rsp
+ mov edx,DWORD[12+r15]
+ mov r8d,DWORD[16+r15]
+ mov r9d,DWORD[20+r15]
+ mov r10d,DWORD[24+r15]
+ mov r11d,DWORD[28+r15]
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ jmp NEAR $L$oop_avx2
+ALIGN 16
+$L$oop_avx2:
+ vmovdqa ymm7,YMMWORD[((K256+512))]
+ vmovdqu xmm0,XMMWORD[((-64+0))+r13*1+rsi]
+ vmovdqu xmm1,XMMWORD[((-64+16))+r13*1+rsi]
+ vmovdqu xmm2,XMMWORD[((-64+32))+r13*1+rsi]
+ vmovdqu xmm3,XMMWORD[((-64+48))+r13*1+rsi]
+
+ vinserti128 ymm0,ymm0,XMMWORD[r12],1
+ vinserti128 ymm1,ymm1,XMMWORD[16+r12],1
+ vpshufb ymm0,ymm0,ymm7
+ vinserti128 ymm2,ymm2,XMMWORD[32+r12],1
+ vpshufb ymm1,ymm1,ymm7
+ vinserti128 ymm3,ymm3,XMMWORD[48+r12],1
+
+ lea rbp,[K256]
+ vpshufb ymm2,ymm2,ymm7
+ lea r13,[((-64))+r13]
+ vpaddd ymm4,ymm0,YMMWORD[rbp]
+ vpshufb ymm3,ymm3,ymm7
+ vpaddd ymm5,ymm1,YMMWORD[32+rbp]
+ vpaddd ymm6,ymm2,YMMWORD[64+rbp]
+ vpaddd ymm7,ymm3,YMMWORD[96+rbp]
+ vmovdqa YMMWORD[rsp],ymm4
+ xor r14d,r14d
+ vmovdqa YMMWORD[32+rsp],ymm5
+ lea rsp,[((-64))+rsp]
+ mov esi,ebx
+ vmovdqa YMMWORD[rsp],ymm6
+ xor esi,ecx
+ vmovdqa YMMWORD[32+rsp],ymm7
+ mov r12d,r9d
+ sub rbp,-16*2*4
+ jmp NEAR $L$avx2_00_47
+
+ALIGN 16
+$L$avx2_00_47:
+ vmovdqu xmm9,XMMWORD[r13]
+ vpinsrq xmm15,xmm15,r13,0
+ lea rsp,[((-64))+rsp]
+ vpalignr ymm4,ymm1,ymm0,4
+ add r11d,DWORD[((0+128))+rsp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ vpalignr ymm7,ymm3,ymm2,4
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ vpsrld ymm6,ymm4,7
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ vpaddd ymm0,ymm0,ymm7
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ vpsrld ymm7,ymm4,3
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ vpslld ymm5,ymm4,14
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ vpxor ymm4,ymm7,ymm6
+ and esi,r15d
+ vpxor xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((16-128))+rdi]
+ xor r14d,r12d
+ xor esi,ebx
+ vpshufd ymm7,ymm3,250
+ xor r14d,r13d
+ lea r11d,[rsi*1+r11]
+ mov r12d,r8d
+ vpsrld ymm6,ymm6,11
+ add r10d,DWORD[((4+128))+rsp]
+ and r12d,edx
+ rorx r13d,edx,25
+ vpxor ymm4,ymm4,ymm5
+ rorx esi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ vpslld ymm5,ymm5,11
+ andn r12d,edx,r9d
+ xor r13d,esi
+ rorx r14d,edx,6
+ vpxor ymm4,ymm4,ymm6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov esi,r11d
+ vpsrld ymm6,ymm7,10
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor esi,eax
+ vpxor ymm4,ymm4,ymm5
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ vpsrlq ymm7,ymm7,17
+ and r15d,esi
+ vpxor xmm9,xmm9,xmm8
+ xor r14d,r12d
+ xor r15d,eax
+ vpaddd ymm0,ymm0,ymm4
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ vpxor ymm6,ymm6,ymm7
+ add r9d,DWORD[((8+128))+rsp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ vpsrlq ymm7,ymm7,2
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ vpshufd ymm6,ymm6,132
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ vpsrldq ymm6,ymm6,8
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ vpaddd ymm0,ymm0,ymm6
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ vpshufd ymm7,ymm0,80
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((32-128))+rdi]
+ xor r14d,r12d
+ xor esi,r11d
+ vpsrld ymm6,ymm7,10
+ xor r14d,r13d
+ lea r9d,[rsi*1+r9]
+ mov r12d,ecx
+ vpsrlq ymm7,ymm7,17
+ add r8d,DWORD[((12+128))+rsp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ vpxor ymm6,ymm6,ymm7
+ rorx esi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ vpsrlq ymm7,ymm7,2
+ andn r12d,ebx,edx
+ xor r13d,esi
+ rorx r14d,ebx,6
+ vpxor ymm6,ymm6,ymm7
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov esi,r9d
+ vpshufd ymm6,ymm6,232
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor esi,r10d
+ vpslldq ymm6,ymm6,8
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ vpaddd ymm0,ymm0,ymm6
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((48-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r10d
+ vpaddd ymm6,ymm0,YMMWORD[rbp]
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ vmovdqa YMMWORD[rsp],ymm6
+ vpalignr ymm4,ymm2,ymm1,4
+ add edx,DWORD[((32+128))+rsp]
+ and r12d,eax
+ rorx r13d,eax,25
+ vpalignr ymm7,ymm0,ymm3,4
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ vpsrld ymm6,ymm4,7
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ vpaddd ymm1,ymm1,ymm7
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ vpsrld ymm7,ymm4,3
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ vpslld ymm5,ymm4,14
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ vpxor ymm4,ymm7,ymm6
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((64-128))+rdi]
+ xor r14d,r12d
+ xor esi,r9d
+ vpshufd ymm7,ymm0,250
+ xor r14d,r13d
+ lea edx,[rsi*1+rdx]
+ mov r12d,eax
+ vpsrld ymm6,ymm6,11
+ add ecx,DWORD[((36+128))+rsp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ vpxor ymm4,ymm4,ymm5
+ rorx esi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ vpslld ymm5,ymm5,11
+ andn r12d,r11d,ebx
+ xor r13d,esi
+ rorx r14d,r11d,6
+ vpxor ymm4,ymm4,ymm6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov esi,edx
+ vpsrld ymm6,ymm7,10
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor esi,r8d
+ vpxor ymm4,ymm4,ymm5
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ vpsrlq ymm7,ymm7,17
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((80-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r8d
+ vpaddd ymm1,ymm1,ymm4
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ vpxor ymm6,ymm6,ymm7
+ add ebx,DWORD[((40+128))+rsp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ vpsrlq ymm7,ymm7,2
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ vpshufd ymm6,ymm6,132
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ vpsrldq ymm6,ymm6,8
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ vpaddd ymm1,ymm1,ymm6
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ vpshufd ymm7,ymm1,80
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((96-128))+rdi]
+ xor r14d,r12d
+ xor esi,edx
+ vpsrld ymm6,ymm7,10
+ xor r14d,r13d
+ lea ebx,[rsi*1+rbx]
+ mov r12d,r10d
+ vpsrlq ymm7,ymm7,17
+ add eax,DWORD[((44+128))+rsp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ vpxor ymm6,ymm6,ymm7
+ rorx esi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ vpsrlq ymm7,ymm7,2
+ andn r12d,r9d,r11d
+ xor r13d,esi
+ rorx r14d,r9d,6
+ vpxor ymm6,ymm6,ymm7
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov esi,ebx
+ vpshufd ymm6,ymm6,232
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor esi,ecx
+ vpslldq ymm6,ymm6,8
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ vpaddd ymm1,ymm1,ymm6
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((112-128))+rdi]
+ xor r14d,r12d
+ xor r15d,ecx
+ vpaddd ymm6,ymm1,YMMWORD[32+rbp]
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ vmovdqa YMMWORD[32+rsp],ymm6
+ lea rsp,[((-64))+rsp]
+ vpalignr ymm4,ymm3,ymm2,4
+ add r11d,DWORD[((0+128))+rsp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ vpalignr ymm7,ymm1,ymm0,4
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ vpsrld ymm6,ymm4,7
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ vpaddd ymm2,ymm2,ymm7
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ vpsrld ymm7,ymm4,3
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ vpslld ymm5,ymm4,14
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ vpxor ymm4,ymm7,ymm6
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((128-128))+rdi]
+ xor r14d,r12d
+ xor esi,ebx
+ vpshufd ymm7,ymm1,250
+ xor r14d,r13d
+ lea r11d,[rsi*1+r11]
+ mov r12d,r8d
+ vpsrld ymm6,ymm6,11
+ add r10d,DWORD[((4+128))+rsp]
+ and r12d,edx
+ rorx r13d,edx,25
+ vpxor ymm4,ymm4,ymm5
+ rorx esi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ vpslld ymm5,ymm5,11
+ andn r12d,edx,r9d
+ xor r13d,esi
+ rorx r14d,edx,6
+ vpxor ymm4,ymm4,ymm6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov esi,r11d
+ vpsrld ymm6,ymm7,10
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor esi,eax
+ vpxor ymm4,ymm4,ymm5
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ vpsrlq ymm7,ymm7,17
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((144-128))+rdi]
+ xor r14d,r12d
+ xor r15d,eax
+ vpaddd ymm2,ymm2,ymm4
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ vpxor ymm6,ymm6,ymm7
+ add r9d,DWORD[((8+128))+rsp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ vpsrlq ymm7,ymm7,2
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ vpshufd ymm6,ymm6,132
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ vpsrldq ymm6,ymm6,8
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ vpaddd ymm2,ymm2,ymm6
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ vpshufd ymm7,ymm2,80
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((160-128))+rdi]
+ xor r14d,r12d
+ xor esi,r11d
+ vpsrld ymm6,ymm7,10
+ xor r14d,r13d
+ lea r9d,[rsi*1+r9]
+ mov r12d,ecx
+ vpsrlq ymm7,ymm7,17
+ add r8d,DWORD[((12+128))+rsp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ vpxor ymm6,ymm6,ymm7
+ rorx esi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ vpsrlq ymm7,ymm7,2
+ andn r12d,ebx,edx
+ xor r13d,esi
+ rorx r14d,ebx,6
+ vpxor ymm6,ymm6,ymm7
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov esi,r9d
+ vpshufd ymm6,ymm6,232
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor esi,r10d
+ vpslldq ymm6,ymm6,8
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ vpaddd ymm2,ymm2,ymm6
+ and r15d,esi
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((176-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r10d
+ vpaddd ymm6,ymm2,YMMWORD[64+rbp]
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ vmovdqa YMMWORD[rsp],ymm6
+ vpalignr ymm4,ymm0,ymm3,4
+ add edx,DWORD[((32+128))+rsp]
+ and r12d,eax
+ rorx r13d,eax,25
+ vpalignr ymm7,ymm2,ymm1,4
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ vpsrld ymm6,ymm4,7
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ vpaddd ymm3,ymm3,ymm7
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ vpsrld ymm7,ymm4,3
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ vpslld ymm5,ymm4,14
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ vpxor ymm4,ymm7,ymm6
+ and esi,r15d
+ vpand xmm8,xmm11,xmm12
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((192-128))+rdi]
+ xor r14d,r12d
+ xor esi,r9d
+ vpshufd ymm7,ymm2,250
+ xor r14d,r13d
+ lea edx,[rsi*1+rdx]
+ mov r12d,eax
+ vpsrld ymm6,ymm6,11
+ add ecx,DWORD[((36+128))+rsp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ vpxor ymm4,ymm4,ymm5
+ rorx esi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ vpslld ymm5,ymm5,11
+ andn r12d,r11d,ebx
+ xor r13d,esi
+ rorx r14d,r11d,6
+ vpxor ymm4,ymm4,ymm6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov esi,edx
+ vpsrld ymm6,ymm7,10
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor esi,r8d
+ vpxor ymm4,ymm4,ymm5
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ vpsrlq ymm7,ymm7,17
+ and r15d,esi
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((208-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r8d
+ vpaddd ymm3,ymm3,ymm4
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ vpxor ymm6,ymm6,ymm7
+ add ebx,DWORD[((40+128))+rsp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ vpsrlq ymm7,ymm7,2
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ vpshufd ymm6,ymm6,132
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ vpsrldq ymm6,ymm6,8
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ vpaddd ymm3,ymm3,ymm6
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ vpshufd ymm7,ymm3,80
+ and esi,r15d
+ vpand xmm11,xmm11,xmm13
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((224-128))+rdi]
+ xor r14d,r12d
+ xor esi,edx
+ vpsrld ymm6,ymm7,10
+ xor r14d,r13d
+ lea ebx,[rsi*1+rbx]
+ mov r12d,r10d
+ vpsrlq ymm7,ymm7,17
+ add eax,DWORD[((44+128))+rsp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ vpxor ymm6,ymm6,ymm7
+ rorx esi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ vpsrlq ymm7,ymm7,2
+ andn r12d,r9d,r11d
+ xor r13d,esi
+ rorx r14d,r9d,6
+ vpxor ymm6,ymm6,ymm7
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov esi,ebx
+ vpshufd ymm6,ymm6,232
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor esi,ecx
+ vpslldq ymm6,ymm6,8
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ vpaddd ymm3,ymm3,ymm6
+ and r15d,esi
+ vpor xmm8,xmm8,xmm11
+ vaesenclast xmm11,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ xor r14d,r12d
+ xor r15d,ecx
+ vpaddd ymm6,ymm3,YMMWORD[96+rbp]
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ vmovdqa YMMWORD[32+rsp],ymm6
+ vmovq r13,xmm15
+ vpextrq r15,xmm15,1
+ vpand xmm11,xmm11,xmm14
+ vpor xmm8,xmm8,xmm11
+ vmovdqu XMMWORD[r13*1+r15],xmm8
+ lea r13,[16+r13]
+ lea rbp,[128+rbp]
+ cmp BYTE[3+rbp],0
+ jne NEAR $L$avx2_00_47
+ vmovdqu xmm9,XMMWORD[r13]
+ vpinsrq xmm15,xmm15,r13,0
+ add r11d,DWORD[((0+64))+rsp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ and esi,r15d
+ vpxor xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((16-128))+rdi]
+ xor r14d,r12d
+ xor esi,ebx
+ xor r14d,r13d
+ lea r11d,[rsi*1+r11]
+ mov r12d,r8d
+ add r10d,DWORD[((4+64))+rsp]
+ and r12d,edx
+ rorx r13d,edx,25
+ rorx esi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ andn r12d,edx,r9d
+ xor r13d,esi
+ rorx r14d,edx,6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov esi,r11d
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor esi,eax
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ and r15d,esi
+ vpxor xmm9,xmm9,xmm8
+ xor r14d,r12d
+ xor r15d,eax
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ add r9d,DWORD[((8+64))+rsp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((32-128))+rdi]
+ xor r14d,r12d
+ xor esi,r11d
+ xor r14d,r13d
+ lea r9d,[rsi*1+r9]
+ mov r12d,ecx
+ add r8d,DWORD[((12+64))+rsp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ rorx esi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ andn r12d,ebx,edx
+ xor r13d,esi
+ rorx r14d,ebx,6
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov esi,r9d
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor esi,r10d
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((48-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r10d
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ add edx,DWORD[((32+64))+rsp]
+ and r12d,eax
+ rorx r13d,eax,25
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((64-128))+rdi]
+ xor r14d,r12d
+ xor esi,r9d
+ xor r14d,r13d
+ lea edx,[rsi*1+rdx]
+ mov r12d,eax
+ add ecx,DWORD[((36+64))+rsp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ rorx esi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ andn r12d,r11d,ebx
+ xor r13d,esi
+ rorx r14d,r11d,6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov esi,edx
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor esi,r8d
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((80-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r8d
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ add ebx,DWORD[((40+64))+rsp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((96-128))+rdi]
+ xor r14d,r12d
+ xor esi,edx
+ xor r14d,r13d
+ lea ebx,[rsi*1+rbx]
+ mov r12d,r10d
+ add eax,DWORD[((44+64))+rsp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ rorx esi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ andn r12d,r9d,r11d
+ xor r13d,esi
+ rorx r14d,r9d,6
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov esi,ebx
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor esi,ecx
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((112-128))+rdi]
+ xor r14d,r12d
+ xor r15d,ecx
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ add r11d,DWORD[rsp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((128-128))+rdi]
+ xor r14d,r12d
+ xor esi,ebx
+ xor r14d,r13d
+ lea r11d,[rsi*1+r11]
+ mov r12d,r8d
+ add r10d,DWORD[4+rsp]
+ and r12d,edx
+ rorx r13d,edx,25
+ rorx esi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ andn r12d,edx,r9d
+ xor r13d,esi
+ rorx r14d,edx,6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov esi,r11d
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor esi,eax
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((144-128))+rdi]
+ xor r14d,r12d
+ xor r15d,eax
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ add r9d,DWORD[8+rsp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((160-128))+rdi]
+ xor r14d,r12d
+ xor esi,r11d
+ xor r14d,r13d
+ lea r9d,[rsi*1+r9]
+ mov r12d,ecx
+ add r8d,DWORD[12+rsp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ rorx esi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ andn r12d,ebx,edx
+ xor r13d,esi
+ rorx r14d,ebx,6
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov esi,r9d
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor esi,r10d
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ and r15d,esi
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((176-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r10d
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ add edx,DWORD[32+rsp]
+ and r12d,eax
+ rorx r13d,eax,25
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ and esi,r15d
+ vpand xmm8,xmm11,xmm12
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((192-128))+rdi]
+ xor r14d,r12d
+ xor esi,r9d
+ xor r14d,r13d
+ lea edx,[rsi*1+rdx]
+ mov r12d,eax
+ add ecx,DWORD[36+rsp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ rorx esi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ andn r12d,r11d,ebx
+ xor r13d,esi
+ rorx r14d,r11d,6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov esi,edx
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor esi,r8d
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ and r15d,esi
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((208-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r8d
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ add ebx,DWORD[40+rsp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ and esi,r15d
+ vpand xmm11,xmm11,xmm13
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((224-128))+rdi]
+ xor r14d,r12d
+ xor esi,edx
+ xor r14d,r13d
+ lea ebx,[rsi*1+rbx]
+ mov r12d,r10d
+ add eax,DWORD[44+rsp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ rorx esi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ andn r12d,r9d,r11d
+ xor r13d,esi
+ rorx r14d,r9d,6
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov esi,ebx
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor esi,ecx
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ and r15d,esi
+ vpor xmm8,xmm8,xmm11
+ vaesenclast xmm11,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ xor r14d,r12d
+ xor r15d,ecx
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ vpextrq r12,xmm15,1
+ vmovq r13,xmm15
+ mov r15,QWORD[552+rsp]
+ add eax,r14d
+ lea rbp,[448+rsp]
+
+ vpand xmm11,xmm11,xmm14
+ vpor xmm8,xmm8,xmm11
+ vmovdqu XMMWORD[r13*1+r12],xmm8
+ lea r13,[16+r13]
+
+ add eax,DWORD[r15]
+ add ebx,DWORD[4+r15]
+ add ecx,DWORD[8+r15]
+ add edx,DWORD[12+r15]
+ add r8d,DWORD[16+r15]
+ add r9d,DWORD[20+r15]
+ add r10d,DWORD[24+r15]
+ add r11d,DWORD[28+r15]
+
+ mov DWORD[r15],eax
+ mov DWORD[4+r15],ebx
+ mov DWORD[8+r15],ecx
+ mov DWORD[12+r15],edx
+ mov DWORD[16+r15],r8d
+ mov DWORD[20+r15],r9d
+ mov DWORD[24+r15],r10d
+ mov DWORD[28+r15],r11d
+
+ cmp r13,QWORD[80+rbp]
+ je NEAR $L$done_avx2
+
+ xor r14d,r14d
+ mov esi,ebx
+ mov r12d,r9d
+ xor esi,ecx
+ jmp NEAR $L$ower_avx2
+ALIGN 16
+$L$ower_avx2:
+ vmovdqu xmm9,XMMWORD[r13]
+ vpinsrq xmm15,xmm15,r13,0
+ add r11d,DWORD[((0+16))+rbp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ and esi,r15d
+ vpxor xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((16-128))+rdi]
+ xor r14d,r12d
+ xor esi,ebx
+ xor r14d,r13d
+ lea r11d,[rsi*1+r11]
+ mov r12d,r8d
+ add r10d,DWORD[((4+16))+rbp]
+ and r12d,edx
+ rorx r13d,edx,25
+ rorx esi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ andn r12d,edx,r9d
+ xor r13d,esi
+ rorx r14d,edx,6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov esi,r11d
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor esi,eax
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ and r15d,esi
+ vpxor xmm9,xmm9,xmm8
+ xor r14d,r12d
+ xor r15d,eax
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ add r9d,DWORD[((8+16))+rbp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((32-128))+rdi]
+ xor r14d,r12d
+ xor esi,r11d
+ xor r14d,r13d
+ lea r9d,[rsi*1+r9]
+ mov r12d,ecx
+ add r8d,DWORD[((12+16))+rbp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ rorx esi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ andn r12d,ebx,edx
+ xor r13d,esi
+ rorx r14d,ebx,6
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov esi,r9d
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor esi,r10d
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((48-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r10d
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ add edx,DWORD[((32+16))+rbp]
+ and r12d,eax
+ rorx r13d,eax,25
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((64-128))+rdi]
+ xor r14d,r12d
+ xor esi,r9d
+ xor r14d,r13d
+ lea edx,[rsi*1+rdx]
+ mov r12d,eax
+ add ecx,DWORD[((36+16))+rbp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ rorx esi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ andn r12d,r11d,ebx
+ xor r13d,esi
+ rorx r14d,r11d,6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov esi,edx
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor esi,r8d
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((80-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r8d
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ add ebx,DWORD[((40+16))+rbp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((96-128))+rdi]
+ xor r14d,r12d
+ xor esi,edx
+ xor r14d,r13d
+ lea ebx,[rsi*1+rbx]
+ mov r12d,r10d
+ add eax,DWORD[((44+16))+rbp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ rorx esi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ andn r12d,r9d,r11d
+ xor r13d,esi
+ rorx r14d,r9d,6
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov esi,ebx
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor esi,ecx
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((112-128))+rdi]
+ xor r14d,r12d
+ xor r15d,ecx
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ lea rbp,[((-64))+rbp]
+ add r11d,DWORD[((0+16))+rbp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((128-128))+rdi]
+ xor r14d,r12d
+ xor esi,ebx
+ xor r14d,r13d
+ lea r11d,[rsi*1+r11]
+ mov r12d,r8d
+ add r10d,DWORD[((4+16))+rbp]
+ and r12d,edx
+ rorx r13d,edx,25
+ rorx esi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ andn r12d,edx,r9d
+ xor r13d,esi
+ rorx r14d,edx,6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov esi,r11d
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor esi,eax
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ and r15d,esi
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((144-128))+rdi]
+ xor r14d,r12d
+ xor r15d,eax
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ add r9d,DWORD[((8+16))+rbp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ and esi,r15d
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((160-128))+rdi]
+ xor r14d,r12d
+ xor esi,r11d
+ xor r14d,r13d
+ lea r9d,[rsi*1+r9]
+ mov r12d,ecx
+ add r8d,DWORD[((12+16))+rbp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ rorx esi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ andn r12d,ebx,edx
+ xor r13d,esi
+ rorx r14d,ebx,6
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov esi,r9d
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor esi,r10d
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ and r15d,esi
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((176-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r10d
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ add edx,DWORD[((32+16))+rbp]
+ and r12d,eax
+ rorx r13d,eax,25
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ and esi,r15d
+ vpand xmm8,xmm11,xmm12
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((192-128))+rdi]
+ xor r14d,r12d
+ xor esi,r9d
+ xor r14d,r13d
+ lea edx,[rsi*1+rdx]
+ mov r12d,eax
+ add ecx,DWORD[((36+16))+rbp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ rorx esi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ andn r12d,r11d,ebx
+ xor r13d,esi
+ rorx r14d,r11d,6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov esi,edx
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor esi,r8d
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ and r15d,esi
+ vaesenclast xmm11,xmm9,xmm10
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((208-128))+rdi]
+ xor r14d,r12d
+ xor r15d,r8d
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ add ebx,DWORD[((40+16))+rbp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ and esi,r15d
+ vpand xmm11,xmm11,xmm13
+ vaesenc xmm9,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((224-128))+rdi]
+ xor r14d,r12d
+ xor esi,edx
+ xor r14d,r13d
+ lea ebx,[rsi*1+rbx]
+ mov r12d,r10d
+ add eax,DWORD[((44+16))+rbp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ rorx esi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ andn r12d,r9d,r11d
+ xor r13d,esi
+ rorx r14d,r9d,6
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov esi,ebx
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor esi,ecx
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ and r15d,esi
+ vpor xmm8,xmm8,xmm11
+ vaesenclast xmm11,xmm9,xmm10
+ vmovdqu xmm10,XMMWORD[((0-128))+rdi]
+ xor r14d,r12d
+ xor r15d,ecx
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ vmovq r13,xmm15
+ vpextrq r15,xmm15,1
+ vpand xmm11,xmm11,xmm14
+ vpor xmm8,xmm8,xmm11
+ lea rbp,[((-64))+rbp]
+ vmovdqu XMMWORD[r13*1+r15],xmm8
+ lea r13,[16+r13]
+ cmp rbp,rsp
+ jae NEAR $L$ower_avx2
+
+ mov r15,QWORD[552+rsp]
+ lea r13,[64+r13]
+ mov rsi,QWORD[560+rsp]
+ add eax,r14d
+ lea rsp,[448+rsp]
+
+ add eax,DWORD[r15]
+ add ebx,DWORD[4+r15]
+ add ecx,DWORD[8+r15]
+ add edx,DWORD[12+r15]
+ add r8d,DWORD[16+r15]
+ add r9d,DWORD[20+r15]
+ add r10d,DWORD[24+r15]
+ lea r12,[r13*1+rsi]
+ add r11d,DWORD[28+r15]
+
+ cmp r13,QWORD[((64+16))+rsp]
+
+ mov DWORD[r15],eax
+ cmove r12,rsp
+ mov DWORD[4+r15],ebx
+ mov DWORD[8+r15],ecx
+ mov DWORD[12+r15],edx
+ mov DWORD[16+r15],r8d
+ mov DWORD[20+r15],r9d
+ mov DWORD[24+r15],r10d
+ mov DWORD[28+r15],r11d
+
+ jbe NEAR $L$oop_avx2
+ lea rbp,[rsp]
+
+
+
+
+$L$done_avx2:
+ mov r8,QWORD[((64+32))+rbp]
+ mov rsi,QWORD[((64+56))+rbp]
+
+ vmovdqu XMMWORD[r8],xmm8
+ vzeroall
+ movaps xmm6,XMMWORD[128+rbp]
+ movaps xmm7,XMMWORD[144+rbp]
+ movaps xmm8,XMMWORD[160+rbp]
+ movaps xmm9,XMMWORD[176+rbp]
+ movaps xmm10,XMMWORD[192+rbp]
+ movaps xmm11,XMMWORD[208+rbp]
+ movaps xmm12,XMMWORD[224+rbp]
+ movaps xmm13,XMMWORD[240+rbp]
+ movaps xmm14,XMMWORD[256+rbp]
+ movaps xmm15,XMMWORD[272+rbp]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$epilogue_avx2:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_aesni_cbc_sha256_enc_avx2:
+
+ALIGN 32
+aesni_cbc_sha256_enc_shaext:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_sha256_enc_shaext:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+ mov r9,QWORD[48+rsp]
+
+
+
+ mov r10,QWORD[56+rsp]
+ lea rsp,[((-168))+rsp]
+ movaps XMMWORD[(-8-160)+rax],xmm6
+ movaps XMMWORD[(-8-144)+rax],xmm7
+ movaps XMMWORD[(-8-128)+rax],xmm8
+ movaps XMMWORD[(-8-112)+rax],xmm9
+ movaps XMMWORD[(-8-96)+rax],xmm10
+ movaps XMMWORD[(-8-80)+rax],xmm11
+ movaps XMMWORD[(-8-64)+rax],xmm12
+ movaps XMMWORD[(-8-48)+rax],xmm13
+ movaps XMMWORD[(-8-32)+rax],xmm14
+ movaps XMMWORD[(-8-16)+rax],xmm15
+$L$prologue_shaext:
+ lea rax,[((K256+128))]
+ movdqu xmm1,XMMWORD[r9]
+ movdqu xmm2,XMMWORD[16+r9]
+ movdqa xmm3,XMMWORD[((512-128))+rax]
+
+ mov r11d,DWORD[240+rcx]
+ sub rsi,rdi
+ movups xmm15,XMMWORD[rcx]
+ movups xmm6,XMMWORD[r8]
+ movups xmm4,XMMWORD[16+rcx]
+ lea rcx,[112+rcx]
+
+ pshufd xmm0,xmm1,0x1b
+ pshufd xmm1,xmm1,0xb1
+ pshufd xmm2,xmm2,0x1b
+ movdqa xmm7,xmm3
+DB 102,15,58,15,202,8
+ punpcklqdq xmm2,xmm0
+
+ jmp NEAR $L$oop_shaext
+
+ALIGN 16
+$L$oop_shaext:
+ movdqu xmm10,XMMWORD[r10]
+ movdqu xmm11,XMMWORD[16+r10]
+ movdqu xmm12,XMMWORD[32+r10]
+DB 102,68,15,56,0,211
+ movdqu xmm13,XMMWORD[48+r10]
+
+ movdqa xmm0,XMMWORD[((0-128))+rax]
+ paddd xmm0,xmm10
+DB 102,68,15,56,0,219
+ movdqa xmm9,xmm2
+ movdqa xmm8,xmm1
+ movups xmm14,XMMWORD[rdi]
+ xorps xmm14,xmm15
+ xorps xmm6,xmm14
+ movups xmm5,XMMWORD[((-80))+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movups xmm4,XMMWORD[((-64))+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,202
+
+ movdqa xmm0,XMMWORD[((32-128))+rax]
+ paddd xmm0,xmm11
+DB 102,68,15,56,0,227
+ lea r10,[64+r10]
+ movups xmm5,XMMWORD[((-48))+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movups xmm4,XMMWORD[((-32))+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,202
+
+ movdqa xmm0,XMMWORD[((64-128))+rax]
+ paddd xmm0,xmm12
+DB 102,68,15,56,0,235
+DB 69,15,56,204,211
+ movups xmm5,XMMWORD[((-16))+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm13
+DB 102,65,15,58,15,220,4
+ paddd xmm10,xmm3
+ movups xmm4,XMMWORD[rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,202
+
+ movdqa xmm0,XMMWORD[((96-128))+rax]
+ paddd xmm0,xmm13
+DB 69,15,56,205,213
+DB 69,15,56,204,220
+ movups xmm5,XMMWORD[16+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movups xmm4,XMMWORD[32+rcx]
+ aesenc xmm6,xmm5
+ movdqa xmm3,xmm10
+DB 102,65,15,58,15,221,4
+ paddd xmm11,xmm3
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((128-128))+rax]
+ paddd xmm0,xmm10
+DB 69,15,56,205,218
+DB 69,15,56,204,229
+ movups xmm5,XMMWORD[48+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm11
+DB 102,65,15,58,15,218,4
+ paddd xmm12,xmm3
+ cmp r11d,11
+ jb NEAR $L$aesenclast1
+ movups xmm4,XMMWORD[64+rcx]
+ aesenc xmm6,xmm5
+ movups xmm5,XMMWORD[80+rcx]
+ aesenc xmm6,xmm4
+ je NEAR $L$aesenclast1
+ movups xmm4,XMMWORD[96+rcx]
+ aesenc xmm6,xmm5
+ movups xmm5,XMMWORD[112+rcx]
+ aesenc xmm6,xmm4
+$L$aesenclast1:
+ aesenclast xmm6,xmm5
+ movups xmm4,XMMWORD[((16-112))+rcx]
+ nop
+DB 15,56,203,202
+ movups xmm14,XMMWORD[16+rdi]
+ xorps xmm14,xmm15
+ movups XMMWORD[rdi*1+rsi],xmm6
+ xorps xmm6,xmm14
+ movups xmm5,XMMWORD[((-80))+rcx]
+ aesenc xmm6,xmm4
+ movdqa xmm0,XMMWORD[((160-128))+rax]
+ paddd xmm0,xmm11
+DB 69,15,56,205,227
+DB 69,15,56,204,234
+ movups xmm4,XMMWORD[((-64))+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm12
+DB 102,65,15,58,15,219,4
+ paddd xmm13,xmm3
+ movups xmm5,XMMWORD[((-48))+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((192-128))+rax]
+ paddd xmm0,xmm12
+DB 69,15,56,205,236
+DB 69,15,56,204,211
+ movups xmm4,XMMWORD[((-32))+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm13
+DB 102,65,15,58,15,220,4
+ paddd xmm10,xmm3
+ movups xmm5,XMMWORD[((-16))+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((224-128))+rax]
+ paddd xmm0,xmm13
+DB 69,15,56,205,213
+DB 69,15,56,204,220
+ movups xmm4,XMMWORD[rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm10
+DB 102,65,15,58,15,221,4
+ paddd xmm11,xmm3
+ movups xmm5,XMMWORD[16+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((256-128))+rax]
+ paddd xmm0,xmm10
+DB 69,15,56,205,218
+DB 69,15,56,204,229
+ movups xmm4,XMMWORD[32+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm11
+DB 102,65,15,58,15,218,4
+ paddd xmm12,xmm3
+ movups xmm5,XMMWORD[48+rcx]
+ aesenc xmm6,xmm4
+ cmp r11d,11
+ jb NEAR $L$aesenclast2
+ movups xmm4,XMMWORD[64+rcx]
+ aesenc xmm6,xmm5
+ movups xmm5,XMMWORD[80+rcx]
+ aesenc xmm6,xmm4
+ je NEAR $L$aesenclast2
+ movups xmm4,XMMWORD[96+rcx]
+ aesenc xmm6,xmm5
+ movups xmm5,XMMWORD[112+rcx]
+ aesenc xmm6,xmm4
+$L$aesenclast2:
+ aesenclast xmm6,xmm5
+ movups xmm4,XMMWORD[((16-112))+rcx]
+ nop
+DB 15,56,203,202
+ movups xmm14,XMMWORD[32+rdi]
+ xorps xmm14,xmm15
+ movups XMMWORD[16+rdi*1+rsi],xmm6
+ xorps xmm6,xmm14
+ movups xmm5,XMMWORD[((-80))+rcx]
+ aesenc xmm6,xmm4
+ movdqa xmm0,XMMWORD[((288-128))+rax]
+ paddd xmm0,xmm11
+DB 69,15,56,205,227
+DB 69,15,56,204,234
+ movups xmm4,XMMWORD[((-64))+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm12
+DB 102,65,15,58,15,219,4
+ paddd xmm13,xmm3
+ movups xmm5,XMMWORD[((-48))+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((320-128))+rax]
+ paddd xmm0,xmm12
+DB 69,15,56,205,236
+DB 69,15,56,204,211
+ movups xmm4,XMMWORD[((-32))+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm13
+DB 102,65,15,58,15,220,4
+ paddd xmm10,xmm3
+ movups xmm5,XMMWORD[((-16))+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((352-128))+rax]
+ paddd xmm0,xmm13
+DB 69,15,56,205,213
+DB 69,15,56,204,220
+ movups xmm4,XMMWORD[rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm10
+DB 102,65,15,58,15,221,4
+ paddd xmm11,xmm3
+ movups xmm5,XMMWORD[16+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((384-128))+rax]
+ paddd xmm0,xmm10
+DB 69,15,56,205,218
+DB 69,15,56,204,229
+ movups xmm4,XMMWORD[32+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm11
+DB 102,65,15,58,15,218,4
+ paddd xmm12,xmm3
+ movups xmm5,XMMWORD[48+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((416-128))+rax]
+ paddd xmm0,xmm11
+DB 69,15,56,205,227
+DB 69,15,56,204,234
+ cmp r11d,11
+ jb NEAR $L$aesenclast3
+ movups xmm4,XMMWORD[64+rcx]
+ aesenc xmm6,xmm5
+ movups xmm5,XMMWORD[80+rcx]
+ aesenc xmm6,xmm4
+ je NEAR $L$aesenclast3
+ movups xmm4,XMMWORD[96+rcx]
+ aesenc xmm6,xmm5
+ movups xmm5,XMMWORD[112+rcx]
+ aesenc xmm6,xmm4
+$L$aesenclast3:
+ aesenclast xmm6,xmm5
+ movups xmm4,XMMWORD[((16-112))+rcx]
+ nop
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm3,xmm12
+DB 102,65,15,58,15,219,4
+ paddd xmm13,xmm3
+ movups xmm14,XMMWORD[48+rdi]
+ xorps xmm14,xmm15
+ movups XMMWORD[32+rdi*1+rsi],xmm6
+ xorps xmm6,xmm14
+ movups xmm5,XMMWORD[((-80))+rcx]
+ aesenc xmm6,xmm4
+ movups xmm4,XMMWORD[((-64))+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,202
+
+ movdqa xmm0,XMMWORD[((448-128))+rax]
+ paddd xmm0,xmm12
+DB 69,15,56,205,236
+ movdqa xmm3,xmm7
+ movups xmm5,XMMWORD[((-48))+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movups xmm4,XMMWORD[((-32))+rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,202
+
+ movdqa xmm0,XMMWORD[((480-128))+rax]
+ paddd xmm0,xmm13
+ movups xmm5,XMMWORD[((-16))+rcx]
+ aesenc xmm6,xmm4
+ movups xmm4,XMMWORD[rcx]
+ aesenc xmm6,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movups xmm5,XMMWORD[16+rcx]
+ aesenc xmm6,xmm4
+DB 15,56,203,202
+
+ movups xmm4,XMMWORD[32+rcx]
+ aesenc xmm6,xmm5
+ movups xmm5,XMMWORD[48+rcx]
+ aesenc xmm6,xmm4
+ cmp r11d,11
+ jb NEAR $L$aesenclast4
+ movups xmm4,XMMWORD[64+rcx]
+ aesenc xmm6,xmm5
+ movups xmm5,XMMWORD[80+rcx]
+ aesenc xmm6,xmm4
+ je NEAR $L$aesenclast4
+ movups xmm4,XMMWORD[96+rcx]
+ aesenc xmm6,xmm5
+ movups xmm5,XMMWORD[112+rcx]
+ aesenc xmm6,xmm4
+$L$aesenclast4:
+ aesenclast xmm6,xmm5
+ movups xmm4,XMMWORD[((16-112))+rcx]
+ nop
+
+ paddd xmm2,xmm9
+ paddd xmm1,xmm8
+
+ dec rdx
+ movups XMMWORD[48+rdi*1+rsi],xmm6
+ lea rdi,[64+rdi]
+ jnz NEAR $L$oop_shaext
+
+ pshufd xmm2,xmm2,0xb1
+ pshufd xmm3,xmm1,0x1b
+ pshufd xmm1,xmm1,0xb1
+ punpckhqdq xmm1,xmm2
+DB 102,15,58,15,211,8
+
+ movups XMMWORD[r8],xmm6
+ movdqu XMMWORD[r9],xmm1
+ movdqu XMMWORD[16+r9],xmm2
+ movaps xmm6,XMMWORD[rsp]
+ movaps xmm7,XMMWORD[16+rsp]
+ movaps xmm8,XMMWORD[32+rsp]
+ movaps xmm9,XMMWORD[48+rsp]
+ movaps xmm10,XMMWORD[64+rsp]
+ movaps xmm11,XMMWORD[80+rsp]
+ movaps xmm12,XMMWORD[96+rsp]
+ movaps xmm13,XMMWORD[112+rsp]
+ movaps xmm14,XMMWORD[128+rsp]
+ movaps xmm15,XMMWORD[144+rsp]
+ lea rsp,[((8+160))+rsp]
+$L$epilogue_shaext:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_aesni_cbc_sha256_enc_shaext:
+EXTERN __imp_RtlVirtualUnwind
+
+ALIGN 16
+se_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$in_prologue
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$in_prologue
+ lea r10,[aesni_cbc_sha256_enc_shaext]
+ cmp rbx,r10
+ jb NEAR $L$not_in_shaext
+
+ lea rsi,[rax]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+ lea rax,[168+rax]
+ jmp NEAR $L$in_prologue
+$L$not_in_shaext:
+ lea r10,[$L$avx2_shortcut]
+ cmp rbx,r10
+ jb NEAR $L$not_in_avx2
+
+ and rax,-256*4
+ add rax,448
+$L$not_in_avx2:
+ mov rsi,rax
+ mov rax,QWORD[((64+56))+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+ mov rbp,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
+
+ lea rsi,[((64+64))+rsi]
+ lea rdi,[512+r8]
+ mov ecx,20
+ DD 0xa548f3fc
+
+$L$in_prologue:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+
+
+section .pdata rdata align=4
+ DD $L$SEH_begin_aesni_cbc_sha256_enc_xop wrt ..imagebase
+ DD $L$SEH_end_aesni_cbc_sha256_enc_xop wrt ..imagebase
+ DD $L$SEH_info_aesni_cbc_sha256_enc_xop wrt ..imagebase
+
+ DD $L$SEH_begin_aesni_cbc_sha256_enc_avx wrt ..imagebase
+ DD $L$SEH_end_aesni_cbc_sha256_enc_avx wrt ..imagebase
+ DD $L$SEH_info_aesni_cbc_sha256_enc_avx wrt ..imagebase
+ DD $L$SEH_begin_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
+ DD $L$SEH_end_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
+ DD $L$SEH_info_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
+ DD $L$SEH_begin_aesni_cbc_sha256_enc_shaext wrt ..imagebase
+ DD $L$SEH_end_aesni_cbc_sha256_enc_shaext wrt ..imagebase
+ DD $L$SEH_info_aesni_cbc_sha256_enc_shaext wrt ..imagebase
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_aesni_cbc_sha256_enc_xop:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase
+
+$L$SEH_info_aesni_cbc_sha256_enc_avx:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
+$L$SEH_info_aesni_cbc_sha256_enc_avx2:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
+$L$SEH_info_aesni_cbc_sha256_enc_shaext:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$prologue_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase