MdePkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

#------------------------------------------------------------------------------
#
# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
#
# SPDX-License-Identifier: BSD-2-Clause-Patent
#
#------------------------------------------------------------------------------

        /*
         * Provide the GCC intrinsics that are required when using GCC 9 or
         * later with the -moutline-atomics options (which became the default
         * in GCC 10)
         */
        .arch armv8-a

        .macro          reg_alias, pfx, sz
        r0_\sz          .req    \pfx\()0
        r1_\sz          .req    \pfx\()1
        tmp0_\sz        .req    \pfx\()16
        tmp1_\sz        .req    \pfx\()17
        .endm

        /*
         * Define register aliases of the right type for each size
         * (xN for 8 bytes, wN for everything smaller)
         */
        reg_alias       w, 1
        reg_alias       w, 2
        reg_alias       w, 4
        reg_alias       x, 8

        .macro          fn_start, name:req
        .section        .text.\name
        .globl          \name
        .type           \name, %function
\name\():
        .endm

        .macro          fn_end, name:req
        .size           \name, . - \name
        .endm

        /*
         * Emit an atomic helper for \model with operands of size \sz, using
         * the operation specified by \insn (which is the LSE name), and which
         * can be implemented using the generic load-locked/store-conditional
         * (LL/SC) sequence below, using the arithmetic operation given by
         * \opc.
         */
        .macro          emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
        fn_start        __aarch64_\insn\()\sz\()\model
        mov             tmp0_\sz, r0_\sz
0:      ld\a\()xr\s     r0_\sz, [x1]
        .ifnc           \insn, swp
        \opc            tmp1_\sz, r0_\sz, tmp0_\sz
        st\l\()xr\s     w15, tmp1_\sz, [x1]
        .else
        st\l\()xr\s     w15, tmp0_\sz, [x1]
        .endif
        cbnz            w15, 0b
        ret
        fn_end          __aarch64_\insn\()\sz\()\model
        .endm

        /*
         * Emit atomic helpers for \model for operand sizes in the
         * set {1, 2, 4, 8}, for the instruction pattern given by
         * \insn. (This is the LSE name, but this implementation uses
         * the generic LL/SC sequence using \opc as the arithmetic
         * operation on the target.)
         */
        .macro          emit_ld, insn:req, opc:req, model:req, a, l
        emit_ld_sz      1, \insn, \opc, \model, b, \a, \l
        emit_ld_sz      2, \insn, \opc, \model, h, \a, \l
        emit_ld_sz      4, \insn, \opc, \model,  , \a, \l
        emit_ld_sz      8, \insn, \opc, \model,  , \a, \l
        .endm

        /*
         * Emit the compare and swap helper for \model and size \sz
         * using LL/SC instructions.
         */
        .macro          emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
        fn_start        __aarch64_cas\sz\()\model
        \uxt            tmp0_\sz, r0_\sz
0:      ld\a\()xr\s     r0_\sz, [x2]
        cmp             r0_\sz, tmp0_\sz
        bne             1f
        st\l\()xr\s     w15, r1_\sz, [x2]
        cbnz            w15, 0b
1:      ret
        fn_end          __aarch64_cas\sz\()\model
        .endm

        /*
         * Emit compare-and-swap helpers for \model for operand sizes in the
         * set {1, 2, 4, 8, 16}.
         */
        .macro          emit_cas, model:req, a, l
        emit_cas_sz     1, \model, uxtb, b, \a, \l
        emit_cas_sz     2, \model, uxth, h, \a, \l
        emit_cas_sz     4, \model, mov ,  , \a, \l
        emit_cas_sz     8, \model, mov ,  , \a, \l

        /*
         * We cannot use the parameterized sequence for 16 byte CAS, so we
         * need to define it explicitly.
         */
        fn_start        __aarch64_cas16\model
        mov             x16, x0
        mov             x17, x1
0:      ld\a\()xp       x0, x1, [x4]
        cmp             x0, x16
        ccmp            x1, x17, #0, eq
        bne             1f
        st\l\()xp       w15, x16, x17, [x4]
        cbnz            w15, 0b
1:      ret
        fn_end          __aarch64_cas16\model
        .endm

        /*
         * Emit the set of GCC outline atomic helper functions for
         * the memory ordering model given by \model:
         * - relax      unordered loads and stores
         * - acq        load-acquire, unordered store
         * - rel        unordered load, store-release
         * - acq_rel    load-acquire, store-release
         */
        .macro          emit_model, model:req, a, l
        emit_ld         ldadd, add, \model, \a, \l
        emit_ld         ldclr, bic, \model, \a, \l
        emit_ld         ldeor, eor, \model, \a, \l
        emit_ld         ldset, orr, \model, \a, \l
        emit_ld         swp,   mov, \model, \a, \l
        emit_cas        \model, \a, \l
        .endm

        emit_model      _relax
        emit_model      _acq, a
        emit_model      _rel,, l
        emit_model      _acq_rel, a, l