1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
|
;------------------------------------------------------------------------------ ;
; Copyright (c) 2012 - 2022, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Module Name:
;
; ExceptionHandlerAsm.Asm
;
; Abstract:
;
; x64 CPU Exception Handler
;
; Notes:
;
;------------------------------------------------------------------------------
%include "Nasm.inc"
;
; Equivalent NASM structure of IA32_DESCRIPTOR
;
struc IA32_DESCRIPTOR
.Limit CTYPE_UINT16 1
.Base CTYPE_UINTN 1
endstruc
;
; Equivalent NASM structure of IA32_IDT_GATE_DESCRIPTOR
;
struc IA32_IDT_GATE_DESCRIPTOR
.OffsetLow CTYPE_UINT16 1
.Selector CTYPE_UINT16 1
.Reserved_0 CTYPE_UINT8 1
.GateType CTYPE_UINT8 1
.OffsetHigh CTYPE_UINT16 1
.OffsetUpper CTYPE_UINT32 1
.Reserved_1 CTYPE_UINT32 1
endstruc
;
; CommonExceptionHandler()
;
%define VC_EXCEPTION 29
extern ASM_PFX(mErrorCodeFlag) ; Error code flags for exceptions
extern ASM_PFX(mDoFarReturnFlag) ; Do far return flag
extern ASM_PFX(CommonExceptionHandler)
SECTION .data
DEFAULT REL
SECTION .text
ALIGN 8
; Generate 256 IDT vectors.
AsmIdtVectorBegin:
%assign Vector 0
%rep 256
push strict dword %[Vector] ; This instruction pushes sign-extended 8-byte value on stack
push rax
%ifdef NO_ABSOLUTE_RELOCS_IN_TEXT
mov rax, strict qword 0 ; mov rax, ASM_PFX(CommonInterruptEntry)
%else
mov rax, ASM_PFX(CommonInterruptEntry)
%endif
jmp rax
%assign Vector Vector+1
%endrep
AsmIdtVectorEnd:
HookAfterStubHeaderBegin:
push strict dword 0 ; 0 will be fixed
VectorNum:
push rax
%ifdef NO_ABSOLUTE_RELOCS_IN_TEXT
mov rax, strict qword 0 ; mov rax, HookAfterStubHeaderEnd
JmpAbsoluteAddress:
%else
mov rax, HookAfterStubHeaderEnd
%endif
jmp rax
HookAfterStubHeaderEnd:
mov rax, rsp
and sp, 0xfff0 ; make sure 16-byte aligned for exception context
sub rsp, 0x18 ; reserve room for filling exception data later
push rcx
mov rcx, [rax + 8]
bt [ASM_PFX(mErrorCodeFlag)], ecx
jnc .0
push qword [rsp] ; push additional rcx to make stack alignment
.0:
xchg rcx, [rsp] ; restore rcx, save Exception Number in stack
push qword [rax] ; push rax into stack to keep code consistence
;---------------------------------------;
; CommonInterruptEntry ;
;---------------------------------------;
; The follow algorithm is used for the common interrupt routine.
; Entry from each interrupt with a push eax and eax=interrupt number
; Stack frame would be as follows as specified in IA32 manuals:
;
; +---------------------+ <-- 16-byte aligned ensured by processor
; + Old SS +
; +---------------------+
; + Old RSP +
; +---------------------+
; + RFlags +
; +---------------------+
; + CS +
; +---------------------+
; + RIP +
; +---------------------+
; + Error Code +
; +---------------------+
; + Vector Number +
; +---------------------+
; + RBP +
; +---------------------+ <-- RBP, 16-byte aligned
; The follow algorithm is used for the common interrupt routine.
global ASM_PFX(CommonInterruptEntry)
ASM_PFX(CommonInterruptEntry):
cli
pop rax
;
; All interrupt handlers are invoked through interrupt gates, so
; IF flag automatically cleared at the entry point
;
xchg rcx, [rsp] ; Save rcx into stack and save vector number into rcx
and rcx, 0xFF
cmp ecx, 32 ; Intel reserved vector for exceptions?
jae NoErrorCode
bt [ASM_PFX(mErrorCodeFlag)], ecx
jc HasErrorCode
NoErrorCode:
;
; Push a dummy error code on the stack
; to maintain coherent stack map
;
push qword [rsp]
mov qword [rsp + 8], 0
HasErrorCode:
push rbp
mov rbp, rsp
push 0 ; clear EXCEPTION_HANDLER_CONTEXT.OldIdtHandler
push 0 ; clear EXCEPTION_HANDLER_CONTEXT.ExceptionDataFlag
;
; Stack:
; +---------------------+ <-- 16-byte aligned ensured by processor
; + Old SS +
; +---------------------+
; + Old RSP +
; +---------------------+
; + RFlags +
; +---------------------+
; + CS +
; +---------------------+
; + RIP +
; +---------------------+
; + Error Code +
; +---------------------+
; + RCX / Vector Number +
; +---------------------+
; + RBP +
; +---------------------+ <-- RBP, 16-byte aligned
;
;
; Since here the stack pointer is 16-byte aligned, so
; EFI_FX_SAVE_STATE_X64 of EFI_SYSTEM_CONTEXT_x64
; is 16-byte aligned
;
;; UINT64 Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
;; UINT64 R8, R9, R10, R11, R12, R13, R14, R15;
push r15
push r14
push r13
push r12
push r11
push r10
push r9
push r8
push rax
push qword [rbp + 8] ; RCX
push rdx
push rbx
push qword [rbp + 48] ; RSP
push qword [rbp] ; RBP
push rsi
push rdi
;; UINT64 Gs, Fs, Es, Ds, Cs, Ss; insure high 16 bits of each is zero
movzx rax, word [rbp + 56]
push rax ; for ss
movzx rax, word [rbp + 32]
push rax ; for cs
mov rax, ds
push rax
mov rax, es
push rax
mov rax, fs
push rax
mov rax, gs
push rax
mov [rbp + 8], rcx ; save vector number
;; UINT64 Rip;
push qword [rbp + 24]
;; UINT64 Gdtr[2], Idtr[2];
xor rax, rax
push rax
push rax
sidt [rsp]
mov bx, word [rsp]
mov rax, qword [rsp + 2]
mov qword [rsp], rax
mov word [rsp + 8], bx
xor rax, rax
push rax
push rax
sgdt [rsp]
mov bx, word [rsp]
mov rax, qword [rsp + 2]
mov qword [rsp], rax
mov word [rsp + 8], bx
;; UINT64 Ldtr, Tr;
xor rax, rax
str ax
push rax
sldt ax
push rax
;; UINT64 RFlags;
push qword [rbp + 40]
;; UINT64 Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
mov rax, cr8
push rax
mov rax, cr4
or rax, 0x208
mov cr4, rax
push rax
mov rax, cr3
push rax
mov rax, cr2
push rax
xor rax, rax
push rax
mov rax, cr0
push rax
;; UINT64 Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
cmp qword [rbp + 8], VC_EXCEPTION
je VcDebugRegs ; For SEV-ES (#VC) Debug registers ignored
mov rax, dr7
push rax
mov rax, dr6
push rax
mov rax, dr3
push rax
mov rax, dr2
push rax
mov rax, dr1
push rax
mov rax, dr0
push rax
jmp DrFinish
VcDebugRegs:
;; UINT64 Dr0, Dr1, Dr2, Dr3, Dr6, Dr7 are skipped for #VC to avoid exception recursion
xor rax, rax
push rax
push rax
push rax
push rax
push rax
push rax
DrFinish:
;; FX_SAVE_STATE_X64 FxSaveState;
sub rsp, 512
mov rdi, rsp
fxsave [rdi]
;; UEFI calling convention for x64 requires that Direction flag in EFLAGs is clear
cld
;; UINT32 ExceptionData;
push qword [rbp + 16]
;; Prepare parameter and call
mov rcx, [rbp + 8]
mov rdx, rsp
;
; Per X64 calling convention, allocate maximum parameter stack space
; and make sure RSP is 16-byte aligned
;
sub rsp, 4 * 8 + 8
call ASM_PFX(CommonExceptionHandler)
add rsp, 4 * 8 + 8
; The follow algorithm is used for clear shadow stack token busy bit.
; The comment is based on the sample shadow stack.
; Shadow stack is 32 bytes aligned.
; The sample shadow stack layout :
; Address | Context
; +-------------------------+
; 0xFB8 | FREE | It is 0xFC0|0x02|(LMA & CS.L), after SAVEPREVSSP.
; +-------------------------+
; 0xFC0 | Prev SSP |
; +-------------------------+
; 0xFC8 | RIP |
; +-------------------------+
; 0xFD0 | CS |
; +-------------------------+
; 0xFD8 | 0xFD8 | BUSY | BUSY flag cleared after CLRSSBSY
; +-------------------------+
; 0xFE0 | 0xFC0|0x02|(LMA & CS.L) |
; +-------------------------+
; Instructions for Intel Control Flow Enforcement Technology (CET) are supported since NASM version 2.15.01.
cmp qword [ASM_PFX(mDoFarReturnFlag)], 0
jz CetDone
mov rax, cr4
and rax, 0x800000 ; Check if CET is enabled
jz CetDone
sub rsp, 0x10
sidt [rsp]
mov rcx, qword [rsp + IA32_DESCRIPTOR.Base]; Get IDT base address
add rsp, 0x10
mov rax, qword [rbp + 8]; Get exception number
sal rax, 0x04 ; Get IDT offset
add rax, rcx ; Get IDT gate descriptor address
mov al, byte [rax + IA32_IDT_GATE_DESCRIPTOR.Reserved_0]
and rax, 0x01 ; Check IST field
jz CetDone
; SSP should be 0xFC0 at this point
mov rax, 0x04 ; advance past cs:lip:prevssp;supervisor shadow stack token
incsspq rax ; After this SSP should be 0xFE0
saveprevssp ; now the shadow stack restore token will be created at 0xFB8
rdsspq rax ; Read new SSP, SSP should be 0xFE8
sub rax, 0x10
clrssbsy [rax] ; Clear token at 0xFD8, SSP should be 0 after this
sub rax, 0x20
rstorssp [rax] ; Restore to token at 0xFB8, new SSP will be 0xFB8
mov rax, 0x01 ; Pop off the new save token created
incsspq rax ; SSP should be 0xFC0 now
CetDone:
cli
;; UINT64 ExceptionData;
add rsp, 8
;; FX_SAVE_STATE_X64 FxSaveState;
mov rsi, rsp
fxrstor [rsi]
add rsp, 512
;; UINT64 Dr0, Dr1, Dr2, Dr3, Dr6, Dr7;
;; Skip restoration of DRx registers to support in-circuit emualators
;; or debuggers set breakpoint in interrupt/exception context
add rsp, 8 * 6
;; UINT64 Cr0, Cr1, Cr2, Cr3, Cr4, Cr8;
pop rax
mov cr0, rax
add rsp, 8 ; not for Cr1
pop rax
mov cr2, rax
pop rax
mov cr3, rax
pop rax
mov cr4, rax
pop rax
mov cr8, rax
;; UINT64 RFlags;
pop qword [rbp + 40]
;; UINT64 Ldtr, Tr;
;; UINT64 Gdtr[2], Idtr[2];
;; Best not let anyone mess with these particular registers...
add rsp, 48
;; UINT64 Rip;
pop qword [rbp + 24]
;; UINT64 Gs, Fs, Es, Ds, Cs, Ss;
pop rax
; mov gs, rax ; not for gs
pop rax
; mov fs, rax ; not for fs
; (X64 will not use fs and gs, so we do not restore it)
pop rax
mov es, rax
pop rax
mov ds, rax
pop qword [rbp + 32] ; for cs
pop qword [rbp + 56] ; for ss
;; UINT64 Rdi, Rsi, Rbp, Rsp, Rbx, Rdx, Rcx, Rax;
;; UINT64 R8, R9, R10, R11, R12, R13, R14, R15;
pop rdi
pop rsi
add rsp, 8 ; not for rbp
pop qword [rbp + 48] ; for rsp
pop rbx
pop rdx
pop rcx
pop rax
pop r8
pop r9
pop r10
pop r11
pop r12
pop r13
pop r14
pop r15
mov rsp, rbp
pop rbp
add rsp, 16
cmp qword [rsp - 32], 0 ; check EXCEPTION_HANDLER_CONTEXT.OldIdtHandler
jz DoReturn
cmp qword [rsp - 40], 1 ; check EXCEPTION_HANDLER_CONTEXT.ExceptionDataFlag
jz ErrorCode
jmp qword [rsp - 32]
ErrorCode:
sub rsp, 8
jmp qword [rsp - 24]
DoReturn:
cmp qword [ASM_PFX(mDoFarReturnFlag)], 0 ; Check if need to do far return instead of IRET
jz DoIret
push rax
mov rax, rsp ; save old RSP to rax
mov rsp, [rsp + 0x20]
push qword [rax + 0x10] ; save CS in new location
push qword [rax + 0x8] ; save EIP in new location
push qword [rax + 0x18] ; save EFLAGS in new location
mov rax, [rax] ; restore rax
popfq ; restore EFLAGS
retfq
DoIret:
iretq
;-------------------------------------------------------------------------------------
; GetTemplateAddressMap (&AddressMap);
;-------------------------------------------------------------------------------------
; comments here for definition of address map
global ASM_PFX(AsmGetTemplateAddressMap)
ASM_PFX(AsmGetTemplateAddressMap):
lea rax, [AsmIdtVectorBegin]
mov qword [rcx], rax
mov qword [rcx + 0x8], (AsmIdtVectorEnd - AsmIdtVectorBegin) / 256
lea rax, [HookAfterStubHeaderBegin]
mov qword [rcx + 0x10], rax
%ifdef NO_ABSOLUTE_RELOCS_IN_TEXT
; Fix up CommonInterruptEntry address
lea rax, [ASM_PFX(CommonInterruptEntry)]
lea rcx, [AsmIdtVectorBegin]
%rep 256
mov qword [rcx + (JmpAbsoluteAddress - 8 - HookAfterStubHeaderBegin)], rax
add rcx, (AsmIdtVectorEnd - AsmIdtVectorBegin) / 256
%endrep
; Fix up HookAfterStubHeaderEnd
lea rax, [HookAfterStubHeaderEnd]
lea rcx, [JmpAbsoluteAddress]
mov qword [rcx - 8], rax
%endif
ret
;-------------------------------------------------------------------------------------
; AsmVectorNumFixup (*NewVectorAddr, VectorNum, *OldVectorAddr);
;-------------------------------------------------------------------------------------
global ASM_PFX(AsmVectorNumFixup)
ASM_PFX(AsmVectorNumFixup):
mov rax, rdx
mov [rcx + (VectorNum - 4 - HookAfterStubHeaderBegin)], al
ret
|