summaryrefslogtreecommitdiffstats
path: root/IntelFsp2Pkg/Include/SaveRestoreSseAvxNasm.inc
blob: 38c807a311cb3a8811cab1f5acfd4ffd731bfd01 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
;------------------------------------------------------------------------------
;
; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Abstract:
;
;   Provide macro for register save/restore using SSE registers
;
;------------------------------------------------------------------------------

;
; Define SSE and AVX instruction set
;
;
; Define SSE macros using SSE 4.1 instructions
; args 1:XMM, 2:IDX, 3:REG
;
%macro SXMMN        3
            pinsrq  %1, %3, (%2 & 3)
            %endmacro

;
; args 1:XMM, 2:REG, 3:IDX
;
%macro LXMMN        3
            pextrq  %2, %1, (%3 & 3)
            %endmacro

;
; Define AVX macros using AVX instructions
; Save XMM to YMM
; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM
;
%macro SYMMN        3
            vinsertf128  %1, %1, %3, %2
            %endmacro

;
; Restore XMM from YMM
; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)
;
%macro LYMMN        3
            vextractf128  %2, %1, %3
            %endmacro

;
; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.
; Modified: XMM5, YMM6, YMM7 and YMM8
;
%macro SAVE_REGS    0
            SXMMN   xmm5, 0, rbp
            SXMMN   xmm5, 1, rbx
            SYMMN   ymm7, 1, xmm5
            SXMMN   xmm5, 0, rsi
            SXMMN   xmm5, 1, rdi
            SYMMN   ymm8, 1, xmm5
            SAVE_RSP
            %endmacro

;
; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.
; Modified: XMM5, RBP, RBX, RSI, RDI and RSP
;
%macro LOAD_REGS    0
            LYMMN   ymm7, xmm5, 1
            LXMMN   xmm5, rbp, 0
            LXMMN   xmm5, rbx, 1
            LYMMN   ymm8, xmm5, 1
            LXMMN   xmm5, rsi, 0
            LXMMN   xmm5, rdi, 1
            LOAD_RSP
            %endmacro
;
; Restore RBP from YMM7[128:191]
; Modified: XMM5 and RBP
;
%macro LOAD_RBP     0
            LYMMN   ymm7, xmm5, 1
            movq    rbp,  xmm5
            %endmacro

;
; Restore RBX from YMM7[192:255]
; Modified: XMM5 and RBX
;
%macro LOAD_RBX     0
            LYMMN   ymm7, xmm5, 1
            LXMMN   xmm5, rbx, 1
            %endmacro

;
; Upper half of YMM6 to save/restore Time Stamp, RSP
;
;
; Save Time Stamp to YMM6[192:255]
; arg 1:general purpose register which holds time stamp
; Modified: XMM5 and YMM6
;
%macro SAVE_TS      1
            LYMMN   ymm6, xmm5, 1
            SXMMN   xmm5, 1, %1
            SYMMN   ymm6, 1, xmm5
            %endmacro

;
; Restore Time Stamp from YMM6[192:255]
; arg 1:general purpose register where to save time stamp
; Modified: XMM5 and %1
;
%macro LOAD_TS      1
            LYMMN   ymm6, xmm5, 1
            LXMMN   xmm5, %1, 1
            %endmacro

;
; Save RSP to YMM6[128:191]
; Modified: XMM5 and YMM6
;
%macro SAVE_RSP     0
            LYMMN   ymm6, xmm5, 1
            SXMMN   xmm5, 0, rsp
            SYMMN   ymm6, 1, xmm5
            %endmacro

;
; Restore RSP from YMM6[128:191]
; Modified: XMM5 and RSP
;
%macro LOAD_RSP     0
            LYMMN   ymm6, xmm5, 1
            movq    rsp,  xmm5
            %endmacro

;
; Upper half of YMM9 to save/restore UCODE status, BFV address
;
;
; Save uCode status to YMM9[192:255]
; arg 1:general purpose register which holds uCode status
; Modified: XMM5 and YMM9
;
%macro SAVE_UCODE_STATUS     1
            LYMMN   ymm9, xmm5, 1
            SXMMN   xmm5, 0, %1
            SYMMN   ymm9, 1, xmm5
            %endmacro

;
; Restore uCode status from YMM9[192:255]
; arg 1:general purpose register where to save uCode status
; Modified: XMM5 and %1
;
%macro LOAD_UCODE_STATUS     1
            LYMMN   ymm9, xmm5, 1
            movq    %1,  xmm5
            %endmacro

;
; Save BFV address to YMM9[128:191]
; arg 1:general purpose register which holds BFV address
; Modified: XMM5 and YMM9
;
%macro SAVE_BFV     1
            LYMMN   ymm9, xmm5, 1
            SXMMN   xmm5, 1, %1
            SYMMN   ymm9, 1, xmm5
            %endmacro

;
; Restore BFV address from YMM9[128:191]
; arg 1:general purpose register where to save BFV address
; Modified: XMM5 and %1
;
%macro LOAD_BFV     1
            LYMMN   ymm9, xmm5, 1
            LXMMN   xmm5, %1, 1
            %endmacro

;
; Upper half of YMM10 to save/restore RCX
;
;
; Save RCX to YMM10[128:191]
; Modified: XMM5 and YMM10
;

%macro SAVE_RCX     0
            LYMMN   ymm10, xmm5, 1
            SXMMN   xmm5, 0, rcx
            SYMMN   ymm10, 1, xmm5
            %endmacro

;
; Restore RCX from YMM10[128:191]
; Modified: XMM5 and RCX
;

%macro LOAD_RCX     0
            LYMMN   ymm10, xmm5, 1
            movq    rcx,  xmm5
            %endmacro

;
; YMM7[128:191] for calling stack
; arg 1:Entry
; Modified: RSI, XMM5, YMM7
;
%macro CALL_YMM     1
            mov     rsi, %%ReturnAddress
            LYMMN   ymm7, xmm5, 1
            SXMMN   xmm5, 0, rsi
            SYMMN   ymm7, 1, xmm5
            mov     rsi,  %1
            jmp     rsi
%%ReturnAddress:
            %endmacro
;
; Restore RIP from YMM7[128:191]
; Modified: RSI, XMM5
;
%macro RET_YMM      0
            LYMMN   ymm7, xmm5, 1
            movq    rsi, xmm5
            jmp     rsi
            %endmacro

%macro ENABLE_SSE   0
            ;
            ; Initialize floating point units
            ;
            jmp     NextAddress
align 4
            ;
            ; Float control word initial value:
            ; all exceptions masked, double-precision, round-to-nearest
            ;
FpuControlWord       DW      027Fh
            ;
            ; Multimedia-extensions control word:
            ; all exceptions masked, round-to-nearest, flush to zero for masked underflow
            ;
MmxControlWord       DQ      01F80h
SseError:
            ;
            ; Processor has to support SSE
            ;
            jmp     SseError
NextAddress:
            finit
            mov rax, FpuControlWord
            fldcw [rax]

            ;
            ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test
            ; whether the processor supports SSE instruction.
            ;
            mov     r10, rcx
            mov     rax, 1
            cpuid
            bt      rdx, 25
            jnc     SseError

            ;
            ; SSE 4.1 support
            ;
            bt      ecx, 19
            jnc     SseError
            mov     rcx,  r10

            ;
            ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)
            ;
            mov     rax, cr4
            or      rax, 00000600h
            mov     cr4, rax

            ;
            ; The processor should support SSE instruction and we can use
            ; ldmxcsr instruction
            ;
            mov  rax, MmxControlWord
            ldmxcsr [rax]
            %endmacro

%macro ENABLE_AVX   0
            mov     r10, rcx
            mov     eax, 1
            cpuid
            and     ecx, 10000000h
            cmp     ecx, 10000000h ; check AVX feature flag
            je      EnableAvx
AvxError:
            ;
            ; Processor has to support AVX
            ;
            jmp     AvxError
EnableAvx:
            ;
            ; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction
            ;
            mov     rax, cr4
            or      rax, 00040000h
            mov     cr4, rax

            mov     rcx, 0         ; index 0
            xgetbv                 ; result in edx:eax
            or      eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state
            xsetbv
            mov     rcx, r10
            %endmacro