From 25402f5d0660acde3ee382a36b065945251990dc Mon Sep 17 00:00:00 2001
From: Harry Liebel <Harry.Liebel@arm.com>
Date: Thu, 18 Jul 2013 18:07:46 +0000
Subject: ArmPkg: Added Aarch64 support

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Harry Liebel <Harry.Liebel@arm.com>
Signed-off-by: Olivier Martin <olivier.martin@arm.com>


git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@14486 6f19259b-4bc3-4df7-8a09-765794883524
---
 .../Library/CompilerIntrinsicsLib/AArch64/memcpy.S | 125 +++++++++++++++++++++
 .../CompilerIntrinsicsLib.inf                      |   5 +-
 2 files changed, 127 insertions(+), 3 deletions(-)
 create mode 100644 ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S

(limited to 'ArmPkg/Library/CompilerIntrinsicsLib')

diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S
new file mode 100644
index 0000000000..18433b3d50
--- /dev/null
+++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2011 - 2013, ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+.text
+.align 2
+
+
+ASM_GLOBAL ASM_PFX(memcpy)
+
+
+// Taken from Newlib BSD implementation.
+ASM_PFX(memcpy):
+        // Copy dst to x6, so we can preserve return value.
+        mov     x6, x0
+
+        // NOTE: although size_t is unsigned, this code uses signed
+        // comparisons on x2 so relies on nb never having its top bit
+        // set. In practice this is not going to be a real problem.
+
+        // Require at least 64 bytes to be worth aligning.
+        cmp     x2, #64
+        blt     qwordcopy
+
+        // Compute offset to align destination to 16 bytes.
+        neg     x3, x0
+        and     x3, x3, 15
+
+        cbz     x3, blockcopy           // offset == 0 is likely
+
+        // We know there is at least 64 bytes to be done, so we
+        // do a 16 byte misaligned copy at first and then later do
+        // all 16-byte aligned copies.  Some bytes will be copied
+        // twice, but there's no harm in that since memcpy does not
+        // guarantee correctness on overlap.
+
+        sub     x2, x2, x3              // nb -= offset
+        ldp     x4, x5, [x1]
+        add     x1, x1, x3
+        stp     x4, x5, [x6]
+        add     x6, x6, x3
+
+        // The destination pointer is now qword (16 byte) aligned.
+        // (The src pointer might be.)
+
+blockcopy:
+        // Copy 64 bytes at a time.
+        subs    x2, x2, #64
+        blt     3f
+2:      subs    x2, x2, #64
+        ldp     x4, x5, [x1,#0]
+        ldp     x8, x9, [x1,#16]
+        ldp     x10,x11,[x1,#32]
+        ldp     x12,x13,[x1,#48]
+        add     x1, x1, #64
+        stp     x4, x5, [x6,#0]
+        stp     x8, x9, [x6,#16]
+        stp     x10,x11,[x6,#32]
+        stp     x12,x13,[x6,#48]
+        add     x6, x6, #64
+        bge     2b
+
+        // Unwind pre-decrement
+3:      add     x2, x2, #64
+
+qwordcopy:
+        // Copy 0-48 bytes, 16 bytes at a time.
+        subs    x2, x2, #16
+        blt     tailcopy
+2:      ldp     x4, x5, [x1],#16
+        subs    x2, x2, #16
+        stp     x4, x5, [x6],#16
+        bge     2b
+
+        // No need to unwind the pre-decrement, it would not change
+        // the low 4 bits of the count. But how likely is it for the
+        // byte count to be multiple of 16? Is it worth the overhead
+        // of testing for x2 == -16?
+
+tailcopy:
+        // Copy trailing 0-15 bytes.
+        tbz     x2, #3, 1f
+        ldr     x4, [x1],#8             // copy 8 bytes
+        str     x4, [x6],#8
+1:
+        tbz     x2, #2, 1f
+        ldr     w4, [x1],#4             // copy 4 bytes
+        str     w4, [x6],#4
+1:
+        tbz     x2, #1, 1f
+        ldrh    w4, [x1],#2             // copy 2 bytes
+        strh    w4, [x6],#2
+1:
+        tbz     x2, #0, return
+        ldrb    w4, [x1]                // copy 1 byte
+        strb    w4, [x6]
+
+return:
+        // This is the only return point of memcpy.
+        ret
diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
index 53b1b15c77..3e95105cd5 100644
--- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
+++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
@@ -22,9 +22,8 @@
   VERSION_STRING                 = 1.0
   LIBRARY_CLASS                  = CompilerIntrinsicsLib 
 
-
-[Sources.common]
-
+[Sources.AARCH64]
+  AArch64/memcpy.S        | GCC
 
 [Sources.ARM]
   Arm/mullu.asm        | RVCT
-- 
cgit v1.2.3