1212//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1313//!
1414//! Generate functions for each of the following symbols:
15+ //! __aarch64_casM_ORDER
1516//! __aarch64_swpN_ORDER
1617//! __aarch64_ldaddN_ORDER
1718//! __aarch64_ldclrN_ORDER
1819//! __aarch64_ldeorN_ORDER
1920//! __aarch64_ldsetN_ORDER
20- //! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8}, ORDER = { relax, acq, rel, acq_rel }
21- //!
22- //! TODO: M = 16
21+ //! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel }
2322//!
2423//! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants.
2524//! We do something similar, but with macro arguments.
2625
27- /// We don't do runtime dispatch so we don't have to worry about the global ctor.
28- /// Apparently MacOS uses a different number of underscores in the symbol name (???)
29- // #[cfg(target_vendor = "apple")]
30- // macro_rules! have_lse {
31- // () => { ___aarch64_have_lse_atomics }
32- // }
33-
34- // #[cfg(not(target_vendor = "apple"))]
35- // macro_rules! have_lse {
36- // () => { __aarch64_have_lse_atomics }
37- // }
26+ // We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
3827
3928/// Translate a byte size to a Rust type.
29+ #[ rustfmt:: skip]
4030macro_rules! int_ty {
4131 ( 1 ) => { i8 } ;
4232 ( 2 ) => { i16 } ;
@@ -48,6 +38,7 @@ macro_rules! int_ty {
4838/// Given a byte size and a register number, return a register of the appropriate size.
4939///
5040/// See <https://developer.arm.com/documentation/102374/0101/Registers-in-AArch64---general-purpose-registers>.
41+ #[ rustfmt:: skip]
5142macro_rules! reg {
5243 ( 1 , $num: literal) => { concat!( "w" , $num) } ;
5344 ( 2 , $num: literal) => { concat!( "w" , $num) } ;
@@ -56,6 +47,7 @@ macro_rules! reg {
5647}
5748
5849/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction.
50+ #[ rustfmt:: skip]
5951macro_rules! acquire {
6052 ( Relaxed ) => { "" } ;
6153 ( Acquire ) => { "a" } ;
@@ -64,6 +56,7 @@ macro_rules! acquire {
6456}
6557
6658/// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction.
59+ #[ rustfmt:: skip]
6760macro_rules! release {
6861 ( Relaxed ) => { "" } ;
6962 ( Acquire ) => { "" } ;
@@ -72,6 +65,7 @@ macro_rules! release {
7265}
7366
7467/// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction.
68+ #[ rustfmt:: skip]
7569macro_rules! size {
7670 ( 1 ) => { "b" } ;
7771 ( 2 ) => { "h" } ;
@@ -84,6 +78,7 @@ macro_rules! size {
8478/// with the correct semantics.
8579///
8680/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM->
81+ #[ rustfmt:: skip]
8782macro_rules! uxt {
8883 ( 1 ) => { "uxtb" } ;
8984 ( 2 ) => { "uxth" } ;
@@ -95,15 +90,39 @@ macro_rules! uxt {
9590///
9691/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXR--Load-Exclusive-Register->.
9792macro_rules! ldxr {
98- ( $ordering: ident, $bytes: tt) => { concat!( "ld" , acquire!( $ordering) , "xr" , size!( $bytes) ) }
93+ ( $ordering: ident, $bytes: tt) => {
94+ concat!( "ld" , acquire!( $ordering) , "xr" , size!( $bytes) )
95+ } ;
9996}
10097
10198/// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction
10299/// with the correct semantics.
103100///
104101/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXR--Store-Exclusive-Register->.
105102macro_rules! stxr {
106- ( $ordering: ident, $bytes: tt) => { concat!( "st" , release!( $ordering) , "xr" , size!( $bytes) ) }
103+ ( $ordering: ident, $bytes: tt) => {
104+ concat!( "st" , release!( $ordering) , "xr" , size!( $bytes) )
105+ } ;
106+ }
107+
108+ /// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction
109+ /// with the correct semantics.
110+ ///
111+ /// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers->
112+ macro_rules! ldxp {
113+ ( $ordering: ident) => {
114+ concat!( "ld" , acquire!( $ordering) , "xp" )
115+ } ;
116+ }
117+
118+ /// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction
119+ /// with the correct semantics.
120+ ///
121+ /// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers->.
122+ macro_rules! stxp {
123+ ( $ordering: ident) => {
124+ concat!( "st" , release!( $ordering) , "xp" )
125+ } ;
107126}
108127
109128/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
@@ -134,9 +153,38 @@ macro_rules! compare_and_swap {
134153 } }
135154 }
136155 }
137- }
156+ } ;
138157}
139158
159+ // i128 uses a completely different impl, so it has its own macro.
160+ macro_rules! compare_and_swap_i128 {
161+ ( $ordering: ident, $name: ident) => {
162+ intrinsics! {
163+ #[ maybe_use_optimized_c_shim]
164+ #[ naked]
165+ pub extern "C" fn $name (
166+ expected: i128 , desired: i128 , ptr: * mut i128
167+ ) -> i128 {
168+ unsafe { core:: arch:: asm! {
169+ "mov x16, x0" ,
170+ "mov x17, x1" ,
171+ "0:" ,
172+ // LDXP x0, x1, [x4]
173+ concat!( ldxp!( $ordering) , " x0, x1, [x4]" ) ,
174+ "cmp x0, x16" ,
175+ "ccmp x1, x17, #0, eq" ,
176+ "bne 1f" ,
177+ // STXP w(tmp2), x2, x3, [x4]
178+ concat!( stxp!( $ordering) , " w15, x2, x3, [x4]" ) ,
179+ "cbnz w15, 0b" ,
180+ "1:" ,
181+ "ret" ,
182+ options( noreturn)
183+ } }
184+ }
185+ }
186+ } ;
187+ }
140188
141189/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.swap>.
142190macro_rules! swap {
@@ -161,7 +209,7 @@ macro_rules! swap {
161209 } }
162210 }
163211 }
164- }
212+ } ;
165213}
166214
167215/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
@@ -194,28 +242,35 @@ macro_rules! fetch_op {
194242
195243// We need a single macro to pass to `foreach_ldadd`.
196244macro_rules! add {
197- ( $ordering: ident, $bytes: tt, $name: ident) => { fetch_op! { $ordering, $bytes, $name, "add" } }
245+ ( $ordering: ident, $bytes: tt, $name: ident) => {
246+ fetch_op! { $ordering, $bytes, $name, "add" }
247+ } ;
198248}
199249
200250macro_rules! and {
201- ( $ordering: ident, $bytes: tt, $name: ident) => { fetch_op! { $ordering, $bytes, $name, "bic" } }
251+ ( $ordering: ident, $bytes: tt, $name: ident) => {
252+ fetch_op! { $ordering, $bytes, $name, "bic" }
253+ } ;
202254}
203255
204256macro_rules! xor {
205- ( $ordering: ident, $bytes: tt, $name: ident) => { fetch_op! { $ordering, $bytes, $name, "eor" } }
257+ ( $ordering: ident, $bytes: tt, $name: ident) => {
258+ fetch_op! { $ordering, $bytes, $name, "eor" }
259+ } ;
206260}
207261
208262macro_rules! or {
209- ( $ordering: ident, $bytes: tt, $name: ident) => { fetch_op! { $ordering, $bytes, $name, "orr" } }
263+ ( $ordering: ident, $bytes: tt, $name: ident) => {
264+ fetch_op! { $ordering, $bytes, $name, "orr" }
265+ } ;
210266}
211267
212268// See `generate_aarch64_outlined_atomics` in build.rs.
213269include ! ( concat!( env!( "OUT_DIR" ) , "/outlined_atomics.rs" ) ) ;
214270foreach_cas ! ( compare_and_swap) ;
271+ foreach_cas16 ! ( compare_and_swap_i128) ;
215272foreach_swp ! ( swap) ;
216273foreach_ldadd ! ( add) ;
217274foreach_ldclr ! ( and) ;
218275foreach_ldeor ! ( xor) ;
219276foreach_ldset ! ( or) ;
220-
221- // TODO: CAS 16
0 commit comments