summaryrefslogtreecommitdiffstats
path: root/CryptoPkg/Library/OpensslLib/OpensslGen/X64-MSFT/crypto/bn/x86_64-gf2m.nasm
blob: d4367ed02c07af174a035d1afa3a2ae416735b27 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64



ALIGN   16
_mul_1x1:

        sub     rsp,128+8

        mov     r9,-1
        lea     rsi,[rax*1+rax]
        shr     r9,3
        lea     rdi,[rax*4]
        and     r9,rax
        lea     r12,[rax*8]
        sar     rax,63
        lea     r10,[r9*1+r9]
        sar     rsi,63
        lea     r11,[r9*4]
        and     rax,rbp
        sar     rdi,63
        mov     rdx,rax
        shl     rax,63
        and     rsi,rbp
        shr     rdx,1
        mov     rcx,rsi
        shl     rsi,62
        and     rdi,rbp
        shr     rcx,2
        xor     rax,rsi
        mov     rbx,rdi
        shl     rdi,61
        xor     rdx,rcx
        shr     rbx,3
        xor     rax,rdi
        xor     rdx,rbx

        mov     r13,r9
        mov     QWORD[rsp],0
        xor     r13,r10
        mov     QWORD[8+rsp],r9
        mov     r14,r11
        mov     QWORD[16+rsp],r10
        xor     r14,r12
        mov     QWORD[24+rsp],r13

        xor     r9,r11
        mov     QWORD[32+rsp],r11
        xor     r10,r11
        mov     QWORD[40+rsp],r9
        xor     r13,r11
        mov     QWORD[48+rsp],r10
        xor     r9,r14
        mov     QWORD[56+rsp],r13
        xor     r10,r14

        mov     QWORD[64+rsp],r12
        xor     r13,r14
        mov     QWORD[72+rsp],r9
        xor     r9,r11
        mov     QWORD[80+rsp],r10
        xor     r10,r11
        mov     QWORD[88+rsp],r13

        xor     r13,r11
        mov     QWORD[96+rsp],r14
        mov     rsi,r8
        mov     QWORD[104+rsp],r9
        and     rsi,rbp
        mov     QWORD[112+rsp],r10
        shr     rbp,4
        mov     QWORD[120+rsp],r13
        mov     rdi,r8
        and     rdi,rbp
        shr     rbp,4

        movq    xmm0,QWORD[rsi*8+rsp]
        mov     rsi,r8
        and     rsi,rbp
        shr     rbp,4
        mov     rcx,QWORD[rdi*8+rsp]
        mov     rdi,r8
        mov     rbx,rcx
        shl     rcx,4
        and     rdi,rbp
        movq    xmm1,QWORD[rsi*8+rsp]
        shr     rbx,60
        xor     rax,rcx
        pslldq  xmm1,1
        mov     rsi,r8
        shr     rbp,4
        xor     rdx,rbx
        and     rsi,rbp
        shr     rbp,4
        pxor    xmm0,xmm1
        mov     rcx,QWORD[rdi*8+rsp]
        mov     rdi,r8
        mov     rbx,rcx
        shl     rcx,12
        and     rdi,rbp
        movq    xmm1,QWORD[rsi*8+rsp]
        shr     rbx,52
        xor     rax,rcx
        pslldq  xmm1,2
        mov     rsi,r8
        shr     rbp,4
        xor     rdx,rbx
        and     rsi,rbp
        shr     rbp,4
        pxor    xmm0,xmm1
        mov     rcx,QWORD[rdi*8+rsp]
        mov     rdi,r8
        mov     rbx,rcx
        shl     rcx,20
        and     rdi,rbp
        movq    xmm1,QWORD[rsi*8+rsp]
        shr     rbx,44
        xor     rax,rcx
        pslldq  xmm1,3
        mov     rsi,r8
        shr     rbp,4
        xor     rdx,rbx
        and     rsi,rbp
        shr     rbp,4
        pxor    xmm0,xmm1
        mov     rcx,QWORD[rdi*8+rsp]
        mov     rdi,r8
        mov     rbx,rcx
        shl     rcx,28
        and     rdi,rbp
        movq    xmm1,QWORD[rsi*8+rsp]
        shr     rbx,36
        xor     rax,rcx
        pslldq  xmm1,4
        mov     rsi,r8
        shr     rbp,4
        xor     rdx,rbx
        and     rsi,rbp
        shr     rbp,4
        pxor    xmm0,xmm1
        mov     rcx,QWORD[rdi*8+rsp]
        mov     rdi,r8
        mov     rbx,rcx
        shl     rcx,36
        and     rdi,rbp
        movq    xmm1,QWORD[rsi*8+rsp]
        shr     rbx,28
        xor     rax,rcx
        pslldq  xmm1,5
        mov     rsi,r8
        shr     rbp,4
        xor     rdx,rbx
        and     rsi,rbp
        shr     rbp,4
        pxor    xmm0,xmm1
        mov     rcx,QWORD[rdi*8+rsp]
        mov     rdi,r8
        mov     rbx,rcx
        shl     rcx,44
        and     rdi,rbp
        movq    xmm1,QWORD[rsi*8+rsp]
        shr     rbx,20
        xor     rax,rcx
        pslldq  xmm1,6
        mov     rsi,r8
        shr     rbp,4
        xor     rdx,rbx
        and     rsi,rbp
        shr     rbp,4
        pxor    xmm0,xmm1
        mov     rcx,QWORD[rdi*8+rsp]
        mov     rdi,r8
        mov     rbx,rcx
        shl     rcx,52
        and     rdi,rbp
        movq    xmm1,QWORD[rsi*8+rsp]
        shr     rbx,12
        xor     rax,rcx
        pslldq  xmm1,7
        mov     rsi,r8
        shr     rbp,4
        xor     rdx,rbx
        and     rsi,rbp
        shr     rbp,4
        pxor    xmm0,xmm1
        mov     rcx,QWORD[rdi*8+rsp]
        mov     rbx,rcx
        shl     rcx,60
DB      102,72,15,126,198
        shr     rbx,4
        xor     rax,rcx
        psrldq  xmm0,8
        xor     rdx,rbx
DB      102,72,15,126,199
        xor     rax,rsi
        xor     rdx,rdi

        add     rsp,128+8

        DB      0F3h,0C3h               ;repret
$L$end_mul_1x1:


EXTERN  OPENSSL_ia32cap_P
global  bn_GF2m_mul_2x2

ALIGN   16
bn_GF2m_mul_2x2:

        mov     rax,rsp
        mov     r10,QWORD[OPENSSL_ia32cap_P]
        bt      r10,33
        jnc     NEAR $L$vanilla_mul_2x2

DB      102,72,15,110,194
DB      102,73,15,110,201
DB      102,73,15,110,208
        movq    xmm3,QWORD[40+rsp]
        movdqa  xmm4,xmm0
        movdqa  xmm5,xmm1
DB      102,15,58,68,193,0
        pxor    xmm4,xmm2
        pxor    xmm5,xmm3
DB      102,15,58,68,211,0
DB      102,15,58,68,229,0
        xorps   xmm4,xmm0
        xorps   xmm4,xmm2
        movdqa  xmm5,xmm4
        pslldq  xmm4,8
        psrldq  xmm5,8
        pxor    xmm2,xmm4
        pxor    xmm0,xmm5
        movdqu  XMMWORD[rcx],xmm2
        movdqu  XMMWORD[16+rcx],xmm0
        DB      0F3h,0C3h               ;repret

ALIGN   16
$L$vanilla_mul_2x2:
        lea     rsp,[((-136))+rsp]

        mov     r10,QWORD[176+rsp]
        mov     QWORD[120+rsp],rdi
        mov     QWORD[128+rsp],rsi
        mov     QWORD[80+rsp],r14

        mov     QWORD[88+rsp],r13

        mov     QWORD[96+rsp],r12

        mov     QWORD[104+rsp],rbp

        mov     QWORD[112+rsp],rbx

$L$body_mul_2x2:
        mov     QWORD[32+rsp],rcx
        mov     QWORD[40+rsp],rdx
        mov     QWORD[48+rsp],r8
        mov     QWORD[56+rsp],r9
        mov     QWORD[64+rsp],r10

        mov     r8,0xf
        mov     rax,rdx
        mov     rbp,r9
        call    _mul_1x1
        mov     QWORD[16+rsp],rax
        mov     QWORD[24+rsp],rdx

        mov     rax,QWORD[48+rsp]
        mov     rbp,QWORD[64+rsp]
        call    _mul_1x1
        mov     QWORD[rsp],rax
        mov     QWORD[8+rsp],rdx

        mov     rax,QWORD[40+rsp]
        mov     rbp,QWORD[56+rsp]
        xor     rax,QWORD[48+rsp]
        xor     rbp,QWORD[64+rsp]
        call    _mul_1x1
        mov     rbx,QWORD[rsp]
        mov     rcx,QWORD[8+rsp]
        mov     rdi,QWORD[16+rsp]
        mov     rsi,QWORD[24+rsp]
        mov     rbp,QWORD[32+rsp]

        xor     rax,rdx
        xor     rdx,rcx
        xor     rax,rbx
        mov     QWORD[rbp],rbx
        xor     rdx,rdi
        mov     QWORD[24+rbp],rsi
        xor     rax,rsi
        xor     rdx,rsi
        xor     rax,rdx
        mov     QWORD[16+rbp],rdx
        mov     QWORD[8+rbp],rax

        mov     r14,QWORD[80+rsp]

        mov     r13,QWORD[88+rsp]

        mov     r12,QWORD[96+rsp]

        mov     rbp,QWORD[104+rsp]

        mov     rbx,QWORD[112+rsp]

        mov     rdi,QWORD[120+rsp]
        mov     rsi,QWORD[128+rsp]
        lea     rsp,[136+rsp]

$L$epilogue_mul_2x2:
        DB      0F3h,0C3h               ;repret
$L$end_mul_2x2:


DB      71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
DB      99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54
DB      52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
DB      32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
DB      111,114,103,62,0
ALIGN   16
EXTERN  __imp_RtlVirtualUnwind


ALIGN   16
se_handler:
        push    rsi
        push    rdi
        push    rbx
        push    rbp
        push    r12
        push    r13
        push    r14
        push    r15
        pushfq
        sub     rsp,64

        mov     rax,QWORD[120+r8]
        mov     rbx,QWORD[248+r8]

        lea     r10,[$L$body_mul_2x2]
        cmp     rbx,r10
        jb      NEAR $L$in_prologue

        mov     rax,QWORD[152+r8]

        lea     r10,[$L$epilogue_mul_2x2]
        cmp     rbx,r10
        jae     NEAR $L$in_prologue

        mov     r14,QWORD[80+rax]
        mov     r13,QWORD[88+rax]
        mov     r12,QWORD[96+rax]
        mov     rbp,QWORD[104+rax]
        mov     rbx,QWORD[112+rax]
        mov     rdi,QWORD[120+rax]
        mov     rsi,QWORD[128+rax]

        mov     QWORD[144+r8],rbx
        mov     QWORD[160+r8],rbp
        mov     QWORD[168+r8],rsi
        mov     QWORD[176+r8],rdi
        mov     QWORD[216+r8],r12
        mov     QWORD[224+r8],r13
        mov     QWORD[232+r8],r14

        lea     rax,[136+rax]

$L$in_prologue:
        mov     QWORD[152+r8],rax

        mov     rdi,QWORD[40+r9]
        mov     rsi,r8
        mov     ecx,154
        DD      0xa548f3fc

        mov     rsi,r9
        xor     rcx,rcx
        mov     rdx,QWORD[8+rsi]
        mov     r8,QWORD[rsi]
        mov     r9,QWORD[16+rsi]
        mov     r10,QWORD[40+rsi]
        lea     r11,[56+rsi]
        lea     r12,[24+rsi]
        mov     QWORD[32+rsp],r10
        mov     QWORD[40+rsp],r11
        mov     QWORD[48+rsp],r12
        mov     QWORD[56+rsp],rcx
        call    QWORD[__imp_RtlVirtualUnwind]

        mov     eax,1
        add     rsp,64
        popfq
        pop     r15
        pop     r14
        pop     r13
        pop     r12
        pop     rbp
        pop     rbx
        pop     rdi
        pop     rsi
        DB      0F3h,0C3h               ;repret


section .pdata rdata align=4
ALIGN   4
        DD      _mul_1x1 wrt ..imagebase
        DD      $L$end_mul_1x1 wrt ..imagebase
        DD      $L$SEH_info_1x1 wrt ..imagebase

        DD      $L$vanilla_mul_2x2 wrt ..imagebase
        DD      $L$end_mul_2x2 wrt ..imagebase
        DD      $L$SEH_info_2x2 wrt ..imagebase
section .xdata rdata align=8
ALIGN   8
$L$SEH_info_1x1:
DB      0x01,0x07,0x02,0x00
DB      0x07,0x01,0x11,0x00
$L$SEH_info_2x2:
DB      9,0,0,0
        DD      se_handler wrt ..imagebase