66//! which is supported on the current CPU.
77//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
88//!
9- //! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
10- //! Use the `compiler-rt` intrinsics if you want LSE support.
11- //!
129//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1310//!
1411//! Generate functions for each of the following symbols:
2421//! We do something similar, but with macro arguments.
2522#![ cfg_attr( feature = "c" , allow( unused_macros) ) ] // avoid putting the macros into a submodule
2623
27- // We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
24+ use core:: sync:: atomic:: { AtomicU8 , Ordering } ;
25+
26+ /// non-zero if the host supports LSE atomics.
27+ #[ cfg( not( feature = "c" ) ) ]
28+ static HAVE_LSE_ATOMICS : AtomicU8 = AtomicU8 :: new ( 0 ) ;
29+
30+ /// outline-atomics are only enabled with glibc support, add a .init_array entry to
31+ /// check and enable LSE via getauxval. This behavior is similar to compiler rt.
32+ #[ cfg( target_env = "gnu" ) ]
33+ #[ unsafe( link_section = ".init_array.90" ) ]
34+ pub static RUST_LSE_INIT : extern "C" fn ( ) = {
35+ extern "C" fn aarch64_rust_init_lse_atomics ( ) {
36+ const AT_HWCAP : core:: ffi:: c_ulong = 16 ;
37+ const HWCAP_ATOMICS : core:: ffi:: c_ulong = 0x100 ;
38+ let hwcap;
39+
40+ // The most straightforward path to querying for LSE support is the host's libc.
41+ // We can't use the libc crate here, we are a dependency.
42+ unsafe extern "C" {
43+ fn getauxval ( num : core:: ffi:: c_ulong ) -> core:: ffi:: c_ulong ;
44+ }
45+ unsafe {
46+ hwcap = getauxval ( AT_HWCAP ) ;
47+ }
48+ if hwcap & HWCAP_ATOMICS != 0 {
49+ HAVE_LSE_ATOMICS . store ( 1 , Ordering :: Relaxed ) ;
50+ }
51+ }
52+ aarch64_rust_init_lse_atomics
53+ } ;
2854
2955/// Translate a byte size to a Rust type.
3056#[ rustfmt:: skip]
@@ -126,6 +152,39 @@ macro_rules! stxp {
126152 } ;
127153}
128154
155+ // Check if LSE intrinsic can be used, and jump to label if not.
156+ macro_rules! jmp_if_no_lse {
157+ ( $label: literal) => {
158+ concat!(
159+ ".arch_extension lse; " ,
160+ "adrp x16, {have_lse}; " ,
161+ "ldrb w16, [x16, :lo12:{have_lse}]; " ,
162+ "cbz w16, " ,
163+ $label,
164+ ";"
165+ )
166+ } ;
167+ }
168+
169+ // Translate memory ordering to the LSE suffix
170+ #[ rustfmt:: skip]
171+ macro_rules! lse_mem_sfx {
172+ ( Relaxed ) => { "" } ;
173+ ( Acquire ) => { "a" } ;
174+ ( Release ) => { "l" } ;
175+ ( AcqRel ) => { "al" } ;
176+ }
177+
178+ // Generate the aarch64 LSE operation for memory ordering and width
179+ macro_rules! lse {
180+ ( $op: literal, $order: ident, 16 ) => {
181+ concat!( $op, "p" , lse_mem_sfx!( $order) )
182+ } ;
183+ ( $op: literal, $order: ident, $bytes: tt) => {
184+ concat!( $op, lse_mem_sfx!( $order) , size!( $bytes) )
185+ } ;
186+ }
187+
129188/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
130189macro_rules! compare_and_swap {
131190 ( $ordering: ident, $bytes: tt, $name: ident) => {
@@ -137,6 +196,11 @@ macro_rules! compare_and_swap {
137196 ) -> int_ty!( $bytes) {
138197 // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
139198 core:: arch:: naked_asm! {
199+ jmp_if_no_lse!( "8f" ) ,
200+ // CAS s(0), s(1), [x2]
201+ concat!( lse!( "cas" , $ordering, $bytes) , " " , reg!( $bytes, 0 ) , ", " , reg!( $bytes, 1 ) , ", [x2]" ) ,
202+ "ret" ,
203+ "8:" ,
140204 // UXT s(tmp0), s(0)
141205 concat!( uxt!( $bytes) , " " , reg!( $bytes, 16 ) , ", " , reg!( $bytes, 0 ) ) ,
142206 "0:" ,
@@ -150,6 +214,7 @@ macro_rules! compare_and_swap {
150214 "cbnz w17, 0b" ,
151215 "1:" ,
152216 "ret" ,
217+ have_lse = sym crate :: aarch64_linux:: HAVE_LSE_ATOMICS ,
153218 }
154219 }
155220 }
@@ -166,6 +231,11 @@ macro_rules! compare_and_swap_i128 {
166231 expected: i128 , desired: i128 , ptr: * mut i128
167232 ) -> i128 {
168233 core:: arch:: naked_asm! {
234+ jmp_if_no_lse!( "8f" ) ,
235+ // CASP x0, x1, x2, x3, [x4]
236+ concat!( lse!( "cas" , $ordering, 16 ) , " x0, x1, x2, x3, [x4]" ) ,
237+ "ret" ,
238+ "8:" ,
169239 "mov x16, x0" ,
170240 "mov x17, x1" ,
171241 "0:" ,
@@ -179,6 +249,7 @@ macro_rules! compare_and_swap_i128 {
179249 "cbnz w15, 0b" ,
180250 "1:" ,
181251 "ret" ,
252+ have_lse = sym crate :: aarch64_linux:: HAVE_LSE_ATOMICS ,
182253 }
183254 }
184255 }
@@ -195,6 +266,11 @@ macro_rules! swap {
195266 left: int_ty!( $bytes) , right_ptr: * mut int_ty!( $bytes)
196267 ) -> int_ty!( $bytes) {
197268 core:: arch:: naked_asm! {
269+ jmp_if_no_lse!( "8f" ) ,
270+ // SWP s(0), s(0), [x1]
271+ concat!( lse!( "swp" , $ordering, $bytes) , " " , reg!( $bytes, 0 ) , ", " , reg!( $bytes, 0 ) , ", [x1]" ) ,
272+ "ret" ,
273+ "8:" ,
198274 // mov s(tmp0), s(0)
199275 concat!( "mov " , reg!( $bytes, 16 ) , ", " , reg!( $bytes, 0 ) ) ,
200276 "0:" ,
@@ -204,6 +280,7 @@ macro_rules! swap {
204280 concat!( stxr!( $ordering, $bytes) , " w17, " , reg!( $bytes, 16 ) , ", [x1]" ) ,
205281 "cbnz w17, 0b" ,
206282 "ret" ,
283+ have_lse = sym crate :: aarch64_linux:: HAVE_LSE_ATOMICS ,
207284 }
208285 }
209286 }
@@ -212,14 +289,19 @@ macro_rules! swap {
212289
213290/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
214291macro_rules! fetch_op {
215- ( $ordering: ident, $bytes: tt, $name: ident, $op: literal) => {
292+ ( $ordering: ident, $bytes: tt, $name: ident, $op: literal, $lse_op : literal ) => {
216293 intrinsics! {
217294 #[ maybe_use_optimized_c_shim]
218295 #[ unsafe ( naked) ]
219296 pub unsafe extern "C" fn $name (
220297 val: int_ty!( $bytes) , ptr: * mut int_ty!( $bytes)
221298 ) -> int_ty!( $bytes) {
222299 core:: arch:: naked_asm! {
300+ jmp_if_no_lse!( "8f" ) ,
301+ // LSEOP s(0), s(0), [x1]
302+ concat!( lse!( $lse_op, $ordering, $bytes) , " " , reg!( $bytes, 0 ) , ", " , reg!( $bytes, 0 ) , ", [x1]" ) ,
303+ "ret" ,
304+ "8:" ,
223305 // mov s(tmp0), s(0)
224306 concat!( "mov " , reg!( $bytes, 16 ) , ", " , reg!( $bytes, 0 ) ) ,
225307 "0:" ,
@@ -231,6 +313,7 @@ macro_rules! fetch_op {
231313 concat!( stxr!( $ordering, $bytes) , " w15, " , reg!( $bytes, 17 ) , ", [x1]" ) ,
232314 "cbnz w15, 0b" ,
233315 "ret" ,
316+ have_lse = sym crate :: aarch64_linux:: HAVE_LSE_ATOMICS ,
234317 }
235318 }
236319 }
@@ -240,25 +323,25 @@ macro_rules! fetch_op {
240323// We need a single macro to pass to `foreach_ldadd`.
241324macro_rules! add {
242325 ( $ordering: ident, $bytes: tt, $name: ident) => {
243- fetch_op! { $ordering, $bytes, $name, "add" }
326+ fetch_op! { $ordering, $bytes, $name, "add" , "ldadd" }
244327 } ;
245328}
246329
247330macro_rules! and {
248331 ( $ordering: ident, $bytes: tt, $name: ident) => {
249- fetch_op! { $ordering, $bytes, $name, "bic" }
332+ fetch_op! { $ordering, $bytes, $name, "bic" , "ldclr" }
250333 } ;
251334}
252335
253336macro_rules! xor {
254337 ( $ordering: ident, $bytes: tt, $name: ident) => {
255- fetch_op! { $ordering, $bytes, $name, "eor" }
338+ fetch_op! { $ordering, $bytes, $name, "eor" , "ldeor" }
256339 } ;
257340}
258341
259342macro_rules! or {
260343 ( $ordering: ident, $bytes: tt, $name: ident) => {
261- fetch_op! { $ordering, $bytes, $name, "orr" }
344+ fetch_op! { $ordering, $bytes, $name, "orr" , "ldset" }
262345 } ;
263346}
264347
0 commit comments