1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
|
/*++
Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>
This program and the accompanying materials
are licensed and made available under the terms and conditions of the BSD License
which accompanies this distribution. The full text of the license may be found at
http://opensource.org/licenses/bsd-license.php
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
Module Name:
EfiCopyMem.c
Abstract:
This is the code that supports IA32-optimized CopyMem service
--*/
#include "Tiano.h"
VOID
EfiCommonLibCopyMem (
IN VOID *Destination,
IN VOID *Source,
IN UINTN Count
)
/*++
Routine Description:
Copy Length bytes from Source to Destination.
Arguments:
Destination - Target of copy
Source - Place to copy from
Length - Number of bytes to copy
Returns:
None
--*/
{
UINT64 MmxSave;
__asm {
mov ecx, Count
mov esi, Source
mov edi, Destination
; First off, make sure we have no overlap. That is to say,
; if (Source == Destination) => do nothing
; if (Source + Count <= Destination) => regular copy
; if (Destination + Count <= Source) => regular copy
; otherwise, do a reverse copy
mov eax, esi
add eax, ecx ; Source + Count
cmp eax, edi
jle _StartByteCopy
mov eax, edi
add eax, ecx ; Dest + Count
cmp eax, esi
jle _StartByteCopy
cmp esi, edi
je _CopyMemDone
jl _CopyOverlapped ; too bad -- overlaps
; Pick up misaligned start bytes to get destination pointer 4-byte aligned
_StartByteCopy:
cmp ecx, 0
je _CopyMemDone ; Count == 0, all done
mov edx, edi
and dl, 3 ; check lower 2 bits of address
test dl, dl
je SHORT _CopyBlocks ; already aligned?
; Copy a byte
mov al, BYTE PTR [esi] ; get byte from Source
mov BYTE PTR [edi], al ; write byte to Destination
dec ecx
inc edi
inc esi
jmp _StartByteCopy ; back to top of loop
_CopyBlocks:
; Compute how many 64-byte blocks we can clear
mov eax, ecx ; get Count in eax
shr eax, 6 ; convert to 64-byte count
shl eax, 6 ; convert back to bytes
sub ecx, eax ; subtract from the original count
shr eax, 6 ; and this is how many 64-byte blocks
; If no 64-byte blocks, then skip
cmp eax, 0
je _CopyRemainingDWords
; Save mm0
movq MmxSave, mm0
copymmx:
movq mm0, QWORD PTR ds:[esi]
movq QWORD PTR ds:[edi], mm0
movq mm0, QWORD PTR ds:[esi+8]
movq QWORD PTR ds:[edi+8], mm0
movq mm0, QWORD PTR ds:[esi+16]
movq QWORD PTR ds:[edi+16], mm0
movq mm0, QWORD PTR ds:[esi+24]
movq QWORD PTR ds:[edi+24], mm0
movq mm0, QWORD PTR ds:[esi+32]
movq QWORD PTR ds:[edi+32], mm0
movq mm0, QWORD PTR ds:[esi+40]
movq QWORD PTR ds:[edi+40], mm0
movq mm0, QWORD PTR ds:[esi+48]
movq QWORD PTR ds:[edi+48], mm0
movq mm0, QWORD PTR ds:[esi+56]
movq QWORD PTR ds:[edi+56], mm0
add edi, 64
add esi, 64
dec eax
jnz copymmx
; Restore mm0
movq mm0, MmxSave
emms ; Exit MMX Instruction
; Copy as many DWORDS as possible
_CopyRemainingDWords:
cmp ecx, 4
jb _CopyRemainingBytes
mov eax, DWORD PTR [esi] ; get data from Source
mov DWORD PTR [edi], eax ; write byte to Destination
sub ecx, 4 ; decrement Count
add esi, 4 ; advance Source pointer
add edi, 4 ; advance Destination pointer
jmp _CopyRemainingDWords ; back to top
_CopyRemainingBytes:
cmp ecx, 0
je _CopyMemDone
mov al, BYTE PTR [esi] ; get byte from Source
mov BYTE PTR [edi], al ; write byte to Destination
dec ecx
inc esi
inc edi ; advance Destination pointer
jmp SHORT _CopyRemainingBytes ; back to top of loop
;
; We do this block if the source and destination buffers overlap. To
; handle it, copy starting at the end of the source buffer and work
; your way back. Since this is the atypical case, this code has not
; been optimized, and thus simply copies bytes.
;
_CopyOverlapped:
; Move the source and destination pointers to the end of the range
add esi, ecx ; Source + Count
dec esi
add edi, ecx ; Dest + Count
dec edi
_CopyOverlappedLoop:
cmp ecx, 0
je _CopyMemDone
mov al, BYTE PTR [esi] ; get byte from Source
mov BYTE PTR [edi], al ; write byte to Destination
dec ecx
dec esi
dec edi
jmp _CopyOverlappedLoop ; back to top of loop
_CopyMemDone:
}
}
|