diff options
Diffstat (limited to 'MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S')
-rw-r--r-- | MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S new file mode 100644 index 0000000000..951d15777a --- /dev/null +++ b/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.S @@ -0,0 +1,138 @@ +//
+// Copyright (c) 2013 - 2016, Linaro Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of the Linaro nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+// Parameters and result.
+#define src1 r0
+#define src2 r1
+#define limit r2
+#define result r0
+
+// Internal variables.
+#define data1 r3
+#define data2 r4
+#define limit_wd r5
+#define diff r6
+#define tmp1 r7
+#define tmp2 r12
+#define pos r8
+#define mask r14
+
+ .text
+ .thumb
+ .syntax unified
+ .align 5
+ASM_GLOBAL ASM_PFX(InternalMemCompareMem)
+ASM_PFX(InternalMemCompareMem):
+ push {r4-r8, lr}
+ eor tmp1, src1, src2
+ tst tmp1, #3
+ bne .Lmisaligned4
+ ands tmp1, src1, #3
+ bne .Lmutual_align
+ add limit_wd, limit, #3
+ nop.w
+ lsr limit_wd, limit_wd, #2
+
+ // Start of performance-critical section -- one 32B cache line.
+.Lloop_aligned:
+ ldr data1, [src1], #4
+ ldr data2, [src2], #4
+.Lstart_realigned:
+ subs limit_wd, limit_wd, #1
+ eor diff, data1, data2 // Non-zero if differences found.
+ cbnz diff, 0f
+ bne .Lloop_aligned
+ // End of performance-critical section -- one 32B cache line.
+
+ // Not reached the limit, must have found a diff.
+0: cbnz limit_wd, .Lnot_limit
+
+ // Limit % 4 == 0 => all bytes significant.
+ ands limit, limit, #3
+ beq .Lnot_limit
+
+ lsl limit, limit, #3 // Bits -> bytes.
+ mov mask, #~0
+ lsl mask, mask, limit
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ orr diff, diff, mask
+
+.Lnot_limit:
+ rev diff, diff
+ rev data1, data1
+ rev data2, data2
+
+ // The MS-non-zero bit of DIFF marks either the first bit
+ // that is different, or the end of the significant data.
+ // Shifting left now will bring the critical information into the
+ // top bits.
+ clz pos, diff
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+
+ // But we need to zero-extend (char is unsigned) the value and then
+ // perform a signed 32-bit subtraction.
+ lsr data1, data1, #28
+ sub result, data1, data2, lsr #28
+ pop {r4-r8, pc}
+
+.Lmutual_align:
+ // Sources are mutually aligned, but are not currently at an
+ // alignment boundary. Round down the addresses and then mask off
+ // the bytes that precede the start point.
+ bic src1, src1, #3
+ bic src2, src2, #3
+ add limit, limit, tmp1 // Adjust the limit for the extra.
+ lsl tmp1, tmp1, #2 // Bytes beyond alignment -> bits.
+ ldr data1, [src1], #4
+ neg tmp1, tmp1 // Bits to alignment -32.
+ ldr data2, [src2], #4
+ mov tmp2, #~0
+
+ // Little-endian. Early bytes are at LSB.
+ lsr tmp2, tmp2, tmp1 // Shift (tmp1 & 31).
+ add limit_wd, limit, #3
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ lsr limit_wd, limit_wd, #2
+ b .Lstart_realigned
+
+.Lmisaligned4:
+ sub limit, limit, #1
+1:
+ // Perhaps we can do better than this.
+ ldrb data1, [src1], #1
+ ldrb data2, [src2], #1
+ subs limit, limit, #1
+ it cs
+ cmpcs data1, data2
+ beq 1b
+ sub result, data1, data2
+ pop {r4-r8, pc}
|