From 25402f5d0660acde3ee382a36b065945251990dc Mon Sep 17 00:00:00 2001 From: Harry Liebel Date: Thu, 18 Jul 2013 18:07:46 +0000 Subject: ArmPkg: Added Aarch64 support Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Harry Liebel Signed-off-by: Olivier Martin git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@14486 6f19259b-4bc3-4df7-8a09-765794883524 --- .../Library/CompilerIntrinsicsLib/AArch64/memcpy.S | 125 +++++++++++++++++++++ .../CompilerIntrinsicsLib.inf | 5 +- 2 files changed, 127 insertions(+), 3 deletions(-) create mode 100644 ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S (limited to 'ArmPkg/Library/CompilerIntrinsicsLib') diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S new file mode 100644 index 0000000000..18433b3d50 --- /dev/null +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2011 - 2013, ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +.text +.align 2 + + +ASM_GLOBAL ASM_PFX(memcpy) + + +// Taken from Newlib BSD implementation. +ASM_PFX(memcpy): + // Copy dst to x6, so we can preserve return value. + mov x6, x0 + + // NOTE: although size_t is unsigned, this code uses signed + // comparisons on x2 so relies on nb never having its top bit + // set. In practice this is not going to be a real problem. + + // Require at least 64 bytes to be worth aligning. + cmp x2, #64 + blt qwordcopy + + // Compute offset to align destination to 16 bytes. + neg x3, x0 + and x3, x3, 15 + + cbz x3, blockcopy // offset == 0 is likely + + // We know there is at least 64 bytes to be done, so we + // do a 16 byte misaligned copy at first and then later do + // all 16-byte aligned copies. Some bytes will be copied + // twice, but there's no harm in that since memcpy does not + // guarantee correctness on overlap. + + sub x2, x2, x3 // nb -= offset + ldp x4, x5, [x1] + add x1, x1, x3 + stp x4, x5, [x6] + add x6, x6, x3 + + // The destination pointer is now qword (16 byte) aligned. + // (The src pointer might be.) + +blockcopy: + // Copy 64 bytes at a time. + subs x2, x2, #64 + blt 3f +2: subs x2, x2, #64 + ldp x4, x5, [x1,#0] + ldp x8, x9, [x1,#16] + ldp x10,x11,[x1,#32] + ldp x12,x13,[x1,#48] + add x1, x1, #64 + stp x4, x5, [x6,#0] + stp x8, x9, [x6,#16] + stp x10,x11,[x6,#32] + stp x12,x13,[x6,#48] + add x6, x6, #64 + bge 2b + + // Unwind pre-decrement +3: add x2, x2, #64 + +qwordcopy: + // Copy 0-48 bytes, 16 bytes at a time. + subs x2, x2, #16 + blt tailcopy +2: ldp x4, x5, [x1],#16 + subs x2, x2, #16 + stp x4, x5, [x6],#16 + bge 2b + + // No need to unwind the pre-decrement, it would not change + // the low 4 bits of the count. But how likely is it for the + // byte count to be multiple of 16? Is it worth the overhead + // of testing for x2 == -16? + +tailcopy: + // Copy trailing 0-15 bytes. + tbz x2, #3, 1f + ldr x4, [x1],#8 // copy 8 bytes + str x4, [x6],#8 +1: + tbz x2, #2, 1f + ldr w4, [x1],#4 // copy 4 bytes + str w4, [x6],#4 +1: + tbz x2, #1, 1f + ldrh w4, [x1],#2 // copy 2 bytes + strh w4, [x6],#2 +1: + tbz x2, #0, return + ldrb w4, [x1] // copy 1 byte + strb w4, [x6] + +return: + // This is the only return point of memcpy. + ret diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf index 53b1b15c77..3e95105cd5 100644 --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf @@ -22,9 +22,8 @@ VERSION_STRING = 1.0 LIBRARY_CLASS = CompilerIntrinsicsLib - -[Sources.common] - +[Sources.AARCH64] + AArch64/memcpy.S | GCC [Sources.ARM] Arm/mullu.asm | RVCT -- cgit v1.2.3