default rel %define XMMWORD %define YMMWORD %define ZMMWORD section .text code align=64 ALIGN 16 _mul_1x1: sub rsp,128+8 mov r9,-1 lea rsi,[rax*1+rax] shr r9,3 lea rdi,[rax*4] and r9,rax lea r12,[rax*8] sar rax,63 lea r10,[r9*1+r9] sar rsi,63 lea r11,[r9*4] and rax,rbp sar rdi,63 mov rdx,rax shl rax,63 and rsi,rbp shr rdx,1 mov rcx,rsi shl rsi,62 and rdi,rbp shr rcx,2 xor rax,rsi mov rbx,rdi shl rdi,61 xor rdx,rcx shr rbx,3 xor rax,rdi xor rdx,rbx mov r13,r9 mov QWORD[rsp],0 xor r13,r10 mov QWORD[8+rsp],r9 mov r14,r11 mov QWORD[16+rsp],r10 xor r14,r12 mov QWORD[24+rsp],r13 xor r9,r11 mov QWORD[32+rsp],r11 xor r10,r11 mov QWORD[40+rsp],r9 xor r13,r11 mov QWORD[48+rsp],r10 xor r9,r14 mov QWORD[56+rsp],r13 xor r10,r14 mov QWORD[64+rsp],r12 xor r13,r14 mov QWORD[72+rsp],r9 xor r9,r11 mov QWORD[80+rsp],r10 xor r10,r11 mov QWORD[88+rsp],r13 xor r13,r11 mov QWORD[96+rsp],r14 mov rsi,r8 mov QWORD[104+rsp],r9 and rsi,rbp mov QWORD[112+rsp],r10 shr rbp,4 mov QWORD[120+rsp],r13 mov rdi,r8 and rdi,rbp shr rbp,4 movq xmm0,QWORD[rsi*8+rsp] mov rsi,r8 and rsi,rbp shr rbp,4 mov rcx,QWORD[rdi*8+rsp] mov rdi,r8 mov rbx,rcx shl rcx,4 and rdi,rbp movq xmm1,QWORD[rsi*8+rsp] shr rbx,60 xor rax,rcx pslldq xmm1,1 mov rsi,r8 shr rbp,4 xor rdx,rbx and rsi,rbp shr rbp,4 pxor xmm0,xmm1 mov rcx,QWORD[rdi*8+rsp] mov rdi,r8 mov rbx,rcx shl rcx,12 and rdi,rbp movq xmm1,QWORD[rsi*8+rsp] shr rbx,52 xor rax,rcx pslldq xmm1,2 mov rsi,r8 shr rbp,4 xor rdx,rbx and rsi,rbp shr rbp,4 pxor xmm0,xmm1 mov rcx,QWORD[rdi*8+rsp] mov rdi,r8 mov rbx,rcx shl rcx,20 and rdi,rbp movq xmm1,QWORD[rsi*8+rsp] shr rbx,44 xor rax,rcx pslldq xmm1,3 mov rsi,r8 shr rbp,4 xor rdx,rbx and rsi,rbp shr rbp,4 pxor xmm0,xmm1 mov rcx,QWORD[rdi*8+rsp] mov rdi,r8 mov rbx,rcx shl rcx,28 and rdi,rbp movq xmm1,QWORD[rsi*8+rsp] shr rbx,36 xor rax,rcx pslldq xmm1,4 mov rsi,r8 shr rbp,4 xor rdx,rbx and rsi,rbp shr rbp,4 pxor xmm0,xmm1 mov rcx,QWORD[rdi*8+rsp] mov rdi,r8 mov rbx,rcx shl rcx,36 and rdi,rbp movq xmm1,QWORD[rsi*8+rsp] shr rbx,28 xor rax,rcx pslldq xmm1,5 mov rsi,r8 shr rbp,4 xor rdx,rbx and rsi,rbp shr rbp,4 pxor xmm0,xmm1 mov rcx,QWORD[rdi*8+rsp] mov rdi,r8 mov rbx,rcx shl rcx,44 and rdi,rbp movq xmm1,QWORD[rsi*8+rsp] shr rbx,20 xor rax,rcx pslldq xmm1,6 mov rsi,r8 shr rbp,4 xor rdx,rbx and rsi,rbp shr rbp,4 pxor xmm0,xmm1 mov rcx,QWORD[rdi*8+rsp] mov rdi,r8 mov rbx,rcx shl rcx,52 and rdi,rbp movq xmm1,QWORD[rsi*8+rsp] shr rbx,12 xor rax,rcx pslldq xmm1,7 mov rsi,r8 shr rbp,4 xor rdx,rbx and rsi,rbp shr rbp,4 pxor xmm0,xmm1 mov rcx,QWORD[rdi*8+rsp] mov rbx,rcx shl rcx,60 DB 102,72,15,126,198 shr rbx,4 xor rax,rcx psrldq xmm0,8 xor rdx,rbx DB 102,72,15,126,199 xor rax,rsi xor rdx,rdi add rsp,128+8 DB 0F3h,0C3h ;repret $L$end_mul_1x1: EXTERN OPENSSL_ia32cap_P global bn_GF2m_mul_2x2 ALIGN 16 bn_GF2m_mul_2x2: mov rax,rsp mov r10,QWORD[OPENSSL_ia32cap_P] bt r10,33 jnc NEAR $L$vanilla_mul_2x2 DB 102,72,15,110,194 DB 102,73,15,110,201 DB 102,73,15,110,208 movq xmm3,QWORD[40+rsp] movdqa xmm4,xmm0 movdqa xmm5,xmm1 DB 102,15,58,68,193,0 pxor xmm4,xmm2 pxor xmm5,xmm3 DB 102,15,58,68,211,0 DB 102,15,58,68,229,0 xorps xmm4,xmm0 xorps xmm4,xmm2 movdqa xmm5,xmm4 pslldq xmm4,8 psrldq xmm5,8 pxor xmm2,xmm4 pxor xmm0,xmm5 movdqu XMMWORD[rcx],xmm2 movdqu XMMWORD[16+rcx],xmm0 DB 0F3h,0C3h ;repret ALIGN 16 $L$vanilla_mul_2x2: lea rsp,[((-136))+rsp] mov r10,QWORD[176+rsp] mov QWORD[120+rsp],rdi mov QWORD[128+rsp],rsi mov QWORD[80+rsp],r14 mov QWORD[88+rsp],r13 mov QWORD[96+rsp],r12 mov QWORD[104+rsp],rbp mov QWORD[112+rsp],rbx $L$body_mul_2x2: mov QWORD[32+rsp],rcx mov QWORD[40+rsp],rdx mov QWORD[48+rsp],r8 mov QWORD[56+rsp],r9 mov QWORD[64+rsp],r10 mov r8,0xf mov rax,rdx mov rbp,r9 call _mul_1x1 mov QWORD[16+rsp],rax mov QWORD[24+rsp],rdx mov rax,QWORD[48+rsp] mov rbp,QWORD[64+rsp] call _mul_1x1 mov QWORD[rsp],rax mov QWORD[8+rsp],rdx mov rax,QWORD[40+rsp] mov rbp,QWORD[56+rsp] xor rax,QWORD[48+rsp] xor rbp,QWORD[64+rsp] call _mul_1x1 mov rbx,QWORD[rsp] mov rcx,QWORD[8+rsp] mov rdi,QWORD[16+rsp] mov rsi,QWORD[24+rsp] mov rbp,QWORD[32+rsp] xor rax,rdx xor rdx,rcx xor rax,rbx mov QWORD[rbp],rbx xor rdx,rdi mov QWORD[24+rbp],rsi xor rax,rsi xor rdx,rsi xor rax,rdx mov QWORD[16+rbp],rdx mov QWORD[8+rbp],rax mov r14,QWORD[80+rsp] mov r13,QWORD[88+rsp] mov r12,QWORD[96+rsp] mov rbp,QWORD[104+rsp] mov rbx,QWORD[112+rsp] mov rdi,QWORD[120+rsp] mov rsi,QWORD[128+rsp] lea rsp,[136+rsp] $L$epilogue_mul_2x2: DB 0F3h,0C3h ;repret $L$end_mul_2x2: DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54 DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 DB 111,114,103,62,0 ALIGN 16 EXTERN __imp_RtlVirtualUnwind ALIGN 16 se_handler: push rsi push rdi push rbx push rbp push r12 push r13 push r14 push r15 pushfq sub rsp,64 mov rax,QWORD[120+r8] mov rbx,QWORD[248+r8] lea r10,[$L$body_mul_2x2] cmp rbx,r10 jb NEAR $L$in_prologue mov rax,QWORD[152+r8] lea r10,[$L$epilogue_mul_2x2] cmp rbx,r10 jae NEAR $L$in_prologue mov r14,QWORD[80+rax] mov r13,QWORD[88+rax] mov r12,QWORD[96+rax] mov rbp,QWORD[104+rax] mov rbx,QWORD[112+rax] mov rdi,QWORD[120+rax] mov rsi,QWORD[128+rax] mov QWORD[144+r8],rbx mov QWORD[160+r8],rbp mov QWORD[168+r8],rsi mov QWORD[176+r8],rdi mov QWORD[216+r8],r12 mov QWORD[224+r8],r13 mov QWORD[232+r8],r14 lea rax,[136+rax] $L$in_prologue: mov QWORD[152+r8],rax mov rdi,QWORD[40+r9] mov rsi,r8 mov ecx,154 DD 0xa548f3fc mov rsi,r9 xor rcx,rcx mov rdx,QWORD[8+rsi] mov r8,QWORD[rsi] mov r9,QWORD[16+rsi] mov r10,QWORD[40+rsi] lea r11,[56+rsi] lea r12,[24+rsi] mov QWORD[32+rsp],r10 mov QWORD[40+rsp],r11 mov QWORD[48+rsp],r12 mov QWORD[56+rsp],rcx call QWORD[__imp_RtlVirtualUnwind] mov eax,1 add rsp,64 popfq pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx pop rdi pop rsi DB 0F3h,0C3h ;repret section .pdata rdata align=4 ALIGN 4 DD _mul_1x1 wrt ..imagebase DD $L$end_mul_1x1 wrt ..imagebase DD $L$SEH_info_1x1 wrt ..imagebase DD $L$vanilla_mul_2x2 wrt ..imagebase DD $L$end_mul_2x2 wrt ..imagebase DD $L$SEH_info_2x2 wrt ..imagebase section .xdata rdata align=8 ALIGN 8 $L$SEH_info_1x1: DB 0x01,0x07,0x02,0x00 DB 0x07,0x01,0x11,0x00 $L$SEH_info_2x2: DB 9,0,0,0 DD se_handler wrt ..imagebase