summaryrefslogtreecommitdiffstats
path: root/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S
diff options
context:
space:
mode:
Diffstat (limited to 'CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S')
-rw-r--r--CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S494
1 files changed, 494 insertions, 0 deletions
diff --git a/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S
new file mode 100644
index 0000000000..73fad9f75d
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-GCC/crypto/bn/x86-mont.S
@@ -0,0 +1,494 @@
+.text
+.globl bn_mul_mont
+.type bn_mul_mont,@function
+.align 16
+bn_mul_mont:
+.L_bn_mul_mont_begin:
+ #ifdef __CET__
+
+.byte 243,15,30,251
+ #endif
+
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ xorl %eax,%eax
+ movl 40(%esp),%edi
+ cmpl $4,%edi
+ jl .L000just_leave
+ leal 20(%esp),%esi
+ leal 24(%esp),%edx
+ addl $2,%edi
+ negl %edi
+ leal -32(%esp,%edi,4),%ebp
+ negl %edi
+ movl %ebp,%eax
+ subl %edx,%eax
+ andl $2047,%eax
+ subl %eax,%ebp
+ xorl %ebp,%edx
+ andl $2048,%edx
+ xorl $2048,%edx
+ subl %edx,%ebp
+ andl $-64,%ebp
+ movl %esp,%eax
+ subl %ebp,%eax
+ andl $-4096,%eax
+ movl %esp,%edx
+ leal (%ebp,%eax,1),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+ jmp .L002page_walk_done
+.align 16
+.L001page_walk:
+ leal -4096(%esp),%esp
+ movl (%esp),%eax
+ cmpl %ebp,%esp
+ ja .L001page_walk
+.L002page_walk_done:
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%ebp
+ movl 16(%esi),%esi
+ movl (%esi),%esi
+ movl %eax,4(%esp)
+ movl %ebx,8(%esp)
+ movl %ecx,12(%esp)
+ movl %ebp,16(%esp)
+ movl %esi,20(%esp)
+ leal -3(%edi),%ebx
+ movl %edx,24(%esp)
+ leal OPENSSL_ia32cap_P,%eax
+ btl $26,(%eax)
+ jnc .L003non_sse2
+ movl $-1,%eax
+ movd %eax,%mm7
+ movl 8(%esp),%esi
+ movl 12(%esp),%edi
+ movl 16(%esp),%ebp
+ xorl %edx,%edx
+ xorl %ecx,%ecx
+ movd (%edi),%mm4
+ movd (%esi),%mm5
+ movd (%ebp),%mm3
+ pmuludq %mm4,%mm5
+ movq %mm5,%mm2
+ movq %mm5,%mm0
+ pand %mm7,%mm0
+ pmuludq 20(%esp),%mm5
+ pmuludq %mm5,%mm3
+ paddq %mm0,%mm3
+ movd 4(%ebp),%mm1
+ movd 4(%esi),%mm0
+ psrlq $32,%mm2
+ psrlq $32,%mm3
+ incl %ecx
+.align 16
+.L0041st:
+ pmuludq %mm4,%mm0
+ pmuludq %mm5,%mm1
+ paddq %mm0,%mm2
+ paddq %mm1,%mm3
+ movq %mm2,%mm0
+ pand %mm7,%mm0
+ movd 4(%ebp,%ecx,4),%mm1
+ paddq %mm0,%mm3
+ movd 4(%esi,%ecx,4),%mm0
+ psrlq $32,%mm2
+ movd %mm3,28(%esp,%ecx,4)
+ psrlq $32,%mm3
+ leal 1(%ecx),%ecx
+ cmpl %ebx,%ecx
+ jl .L0041st
+ pmuludq %mm4,%mm0
+ pmuludq %mm5,%mm1
+ paddq %mm0,%mm2
+ paddq %mm1,%mm3
+ movq %mm2,%mm0
+ pand %mm7,%mm0
+ paddq %mm0,%mm3
+ movd %mm3,28(%esp,%ecx,4)
+ psrlq $32,%mm2
+ psrlq $32,%mm3
+ paddq %mm2,%mm3
+ movq %mm3,32(%esp,%ebx,4)
+ incl %edx
+.L005outer:
+ xorl %ecx,%ecx
+ movd (%edi,%edx,4),%mm4
+ movd (%esi),%mm5
+ movd 32(%esp),%mm6
+ movd (%ebp),%mm3
+ pmuludq %mm4,%mm5
+ paddq %mm6,%mm5
+ movq %mm5,%mm0
+ movq %mm5,%mm2
+ pand %mm7,%mm0
+ pmuludq 20(%esp),%mm5
+ pmuludq %mm5,%mm3
+ paddq %mm0,%mm3
+ movd 36(%esp),%mm6
+ movd 4(%ebp),%mm1
+ movd 4(%esi),%mm0
+ psrlq $32,%mm2
+ psrlq $32,%mm3
+ paddq %mm6,%mm2
+ incl %ecx
+ decl %ebx
+.L006inner:
+ pmuludq %mm4,%mm0
+ pmuludq %mm5,%mm1
+ paddq %mm0,%mm2
+ paddq %mm1,%mm3
+ movq %mm2,%mm0
+ movd 36(%esp,%ecx,4),%mm6
+ pand %mm7,%mm0
+ movd 4(%ebp,%ecx,4),%mm1
+ paddq %mm0,%mm3
+ movd 4(%esi,%ecx,4),%mm0
+ psrlq $32,%mm2
+ movd %mm3,28(%esp,%ecx,4)
+ psrlq $32,%mm3
+ paddq %mm6,%mm2
+ decl %ebx
+ leal 1(%ecx),%ecx
+ jnz .L006inner
+ movl %ecx,%ebx
+ pmuludq %mm4,%mm0
+ pmuludq %mm5,%mm1
+ paddq %mm0,%mm2
+ paddq %mm1,%mm3
+ movq %mm2,%mm0
+ pand %mm7,%mm0
+ paddq %mm0,%mm3
+ movd %mm3,28(%esp,%ecx,4)
+ psrlq $32,%mm2
+ psrlq $32,%mm3
+ movd 36(%esp,%ebx,4),%mm6
+ paddq %mm2,%mm3
+ paddq %mm6,%mm3
+ movq %mm3,32(%esp,%ebx,4)
+ leal 1(%edx),%edx
+ cmpl %ebx,%edx
+ jle .L005outer
+ emms
+ jmp .L007common_tail
+.align 16
+.L003non_sse2:
+ movl 8(%esp),%esi
+ leal 1(%ebx),%ebp
+ movl 12(%esp),%edi
+ xorl %ecx,%ecx
+ movl %esi,%edx
+ andl $1,%ebp
+ subl %edi,%edx
+ leal 4(%edi,%ebx,4),%eax
+ orl %edx,%ebp
+ movl (%edi),%edi
+ jz .L008bn_sqr_mont
+ movl %eax,28(%esp)
+ movl (%esi),%eax
+ xorl %edx,%edx
+.align 16
+.L009mull:
+ movl %edx,%ebp
+ mull %edi
+ addl %eax,%ebp
+ leal 1(%ecx),%ecx
+ adcl $0,%edx
+ movl (%esi,%ecx,4),%eax
+ cmpl %ebx,%ecx
+ movl %ebp,28(%esp,%ecx,4)
+ jl .L009mull
+ movl %edx,%ebp
+ mull %edi
+ movl 20(%esp),%edi
+ addl %ebp,%eax
+ movl 16(%esp),%esi
+ adcl $0,%edx
+ imull 32(%esp),%edi
+ movl %eax,32(%esp,%ebx,4)
+ xorl %ecx,%ecx
+ movl %edx,36(%esp,%ebx,4)
+ movl %ecx,40(%esp,%ebx,4)
+ movl (%esi),%eax
+ mull %edi
+ addl 32(%esp),%eax
+ movl 4(%esi),%eax
+ adcl $0,%edx
+ incl %ecx
+ jmp .L0102ndmadd
+.align 16
+.L0111stmadd:
+ movl %edx,%ebp
+ mull %edi
+ addl 32(%esp,%ecx,4),%ebp
+ leal 1(%ecx),%ecx
+ adcl $0,%edx
+ addl %eax,%ebp
+ movl (%esi,%ecx,4),%eax
+ adcl $0,%edx
+ cmpl %ebx,%ecx
+ movl %ebp,28(%esp,%ecx,4)
+ jl .L0111stmadd
+ movl %edx,%ebp
+ mull %edi
+ addl 32(%esp,%ebx,4),%eax
+ movl 20(%esp),%edi
+ adcl $0,%edx
+ movl 16(%esp),%esi
+ addl %eax,%ebp
+ adcl $0,%edx
+ imull 32(%esp),%edi
+ xorl %ecx,%ecx
+ addl 36(%esp,%ebx,4),%edx
+ movl %ebp,32(%esp,%ebx,4)
+ adcl $0,%ecx
+ movl (%esi),%eax
+ movl %edx,36(%esp,%ebx,4)
+ movl %ecx,40(%esp,%ebx,4)
+ mull %edi
+ addl 32(%esp),%eax
+ movl 4(%esi),%eax
+ adcl $0,%edx
+ movl $1,%ecx
+.align 16
+.L0102ndmadd:
+ movl %edx,%ebp
+ mull %edi
+ addl 32(%esp,%ecx,4),%ebp
+ leal 1(%ecx),%ecx
+ adcl $0,%edx
+ addl %eax,%ebp
+ movl (%esi,%ecx,4),%eax
+ adcl $0,%edx
+ cmpl %ebx,%ecx
+ movl %ebp,24(%esp,%ecx,4)
+ jl .L0102ndmadd
+ movl %edx,%ebp
+ mull %edi
+ addl 32(%esp,%ebx,4),%ebp
+ adcl $0,%edx
+ addl %eax,%ebp
+ adcl $0,%edx
+ movl %ebp,28(%esp,%ebx,4)
+ xorl %eax,%eax
+ movl 12(%esp),%ecx
+ addl 36(%esp,%ebx,4),%edx
+ adcl 40(%esp,%ebx,4),%eax
+ leal 4(%ecx),%ecx
+ movl %edx,32(%esp,%ebx,4)
+ cmpl 28(%esp),%ecx
+ movl %eax,36(%esp,%ebx,4)
+ je .L007common_tail
+ movl (%ecx),%edi
+ movl 8(%esp),%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%ecx
+ xorl %edx,%edx
+ movl (%esi),%eax
+ jmp .L0111stmadd
+.align 16
+.L008bn_sqr_mont:
+ movl %ebx,(%esp)
+ movl %ecx,12(%esp)
+ movl %edi,%eax
+ mull %edi
+ movl %eax,32(%esp)
+ movl %edx,%ebx
+ shrl $1,%edx
+ andl $1,%ebx
+ incl %ecx
+.align 16
+.L012sqr:
+ movl (%esi,%ecx,4),%eax
+ movl %edx,%ebp
+ mull %edi
+ addl %ebp,%eax
+ leal 1(%ecx),%ecx
+ adcl $0,%edx
+ leal (%ebx,%eax,2),%ebp
+ shrl $31,%eax
+ cmpl (%esp),%ecx
+ movl %eax,%ebx
+ movl %ebp,28(%esp,%ecx,4)
+ jl .L012sqr
+ movl (%esi,%ecx,4),%eax
+ movl %edx,%ebp
+ mull %edi
+ addl %ebp,%eax
+ movl 20(%esp),%edi
+ adcl $0,%edx
+ movl 16(%esp),%esi
+ leal (%ebx,%eax,2),%ebp
+ imull 32(%esp),%edi
+ shrl $31,%eax
+ movl %ebp,32(%esp,%ecx,4)
+ leal (%eax,%edx,2),%ebp
+ movl (%esi),%eax
+ shrl $31,%edx
+ movl %ebp,36(%esp,%ecx,4)
+ movl %edx,40(%esp,%ecx,4)
+ mull %edi
+ addl 32(%esp),%eax
+ movl %ecx,%ebx
+ adcl $0,%edx
+ movl 4(%esi),%eax
+ movl $1,%ecx
+.align 16
+.L0133rdmadd:
+ movl %edx,%ebp
+ mull %edi
+ addl 32(%esp,%ecx,4),%ebp
+ adcl $0,%edx
+ addl %eax,%ebp
+ movl 4(%esi,%ecx,4),%eax
+ adcl $0,%edx
+ movl %ebp,28(%esp,%ecx,4)
+ movl %edx,%ebp
+ mull %edi
+ addl 36(%esp,%ecx,4),%ebp
+ leal 2(%ecx),%ecx
+ adcl $0,%edx
+ addl %eax,%ebp
+ movl (%esi,%ecx,4),%eax
+ adcl $0,%edx
+ cmpl %ebx,%ecx
+ movl %ebp,24(%esp,%ecx,4)
+ jl .L0133rdmadd
+ movl %edx,%ebp
+ mull %edi
+ addl 32(%esp,%ebx,4),%ebp
+ adcl $0,%edx
+ addl %eax,%ebp
+ adcl $0,%edx
+ movl %ebp,28(%esp,%ebx,4)
+ movl 12(%esp),%ecx
+ xorl %eax,%eax
+ movl 8(%esp),%esi
+ addl 36(%esp,%ebx,4),%edx
+ adcl 40(%esp,%ebx,4),%eax
+ movl %edx,32(%esp,%ebx,4)
+ cmpl %ebx,%ecx
+ movl %eax,36(%esp,%ebx,4)
+ je .L007common_tail
+ movl 4(%esi,%ecx,4),%edi
+ leal 1(%ecx),%ecx
+ movl %edi,%eax
+ movl %ecx,12(%esp)
+ mull %edi
+ addl 32(%esp,%ecx,4),%eax
+ adcl $0,%edx
+ movl %eax,32(%esp,%ecx,4)
+ xorl %ebp,%ebp
+ cmpl %ebx,%ecx
+ leal 1(%ecx),%ecx
+ je .L014sqrlast
+ movl %edx,%ebx
+ shrl $1,%edx
+ andl $1,%ebx
+.align 16
+.L015sqradd:
+ movl (%esi,%ecx,4),%eax
+ movl %edx,%ebp
+ mull %edi
+ addl %ebp,%eax
+ leal (%eax,%eax,1),%ebp
+ adcl $0,%edx
+ shrl $31,%eax
+ addl 32(%esp,%ecx,4),%ebp
+ leal 1(%ecx),%ecx
+ adcl $0,%eax
+ addl %ebx,%ebp
+ adcl $0,%eax
+ cmpl (%esp),%ecx
+ movl %ebp,28(%esp,%ecx,4)
+ movl %eax,%ebx
+ jle .L015sqradd
+ movl %edx,%ebp
+ addl %edx,%edx
+ shrl $31,%ebp
+ addl %ebx,%edx
+ adcl $0,%ebp
+.L014sqrlast:
+ movl 20(%esp),%edi
+ movl 16(%esp),%esi
+ imull 32(%esp),%edi
+ addl 32(%esp,%ecx,4),%edx
+ movl (%esi),%eax
+ adcl $0,%ebp
+ movl %edx,32(%esp,%ecx,4)
+ movl %ebp,36(%esp,%ecx,4)
+ mull %edi
+ addl 32(%esp),%eax
+ leal -1(%ecx),%ebx
+ adcl $0,%edx
+ movl $1,%ecx
+ movl 4(%esi),%eax
+ jmp .L0133rdmadd
+.align 16
+.L007common_tail:
+ movl 16(%esp),%ebp
+ movl 4(%esp),%edi
+ leal 32(%esp),%esi
+ movl (%esi),%eax
+ movl %ebx,%ecx
+ xorl %edx,%edx
+.align 16
+.L016sub:
+ sbbl (%ebp,%edx,4),%eax
+ movl %eax,(%edi,%edx,4)
+ decl %ecx
+ movl 4(%esi,%edx,4),%eax
+ leal 1(%edx),%edx
+ jge .L016sub
+ sbbl $0,%eax
+ movl $-1,%edx
+ xorl %eax,%edx
+ jmp .L017copy
+.align 16
+.L017copy:
+ movl 32(%esp,%ebx,4),%esi
+ movl (%edi,%ebx,4),%ebp
+ movl %ecx,32(%esp,%ebx,4)
+ andl %eax,%esi
+ andl %edx,%ebp
+ orl %esi,%ebp
+ movl %ebp,(%edi,%ebx,4)
+ decl %ebx
+ jge .L017copy
+ movl 24(%esp),%esp
+ movl $1,%eax
+.L000just_leave:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size bn_mul_mont,.-.L_bn_mul_mont_begin
+.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
+.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+.byte 111,114,103,62,0
+.comm OPENSSL_ia32cap_P,16,4
+
+ .section ".note.gnu.property", "a"
+ .p2align 2
+ .long 1f - 0f
+ .long 4f - 1f
+ .long 5
+0:
+ .asciz "GNU"
+1:
+ .p2align 2
+ .long 0xc0000002
+ .long 3f - 2f
+2:
+ .long 3
+3:
+ .p2align 2
+4: