66//! which is supported on the current CPU.
77//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
88//!
9- //! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
10- //! Use the `compiler-rt` intrinsics if you want LSE support.
11- //!
129//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
1310//!
1411//! Generate functions for each of the following symbols:
2421//! We do something similar, but with macro arguments.
2522#![ cfg_attr( feature = "c" , allow( unused_macros) ) ] // avoid putting the macros into a submodule
2623
27- // We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
24+ use core:: sync:: atomic:: { AtomicU8 , Ordering } ;
25+
26+ /// non-zero if the host supports LSE atomics.
27+ static HAVE_LSE_ATOMICS : AtomicU8 = AtomicU8 :: new ( 0 ) ;
28+
29+ intrinsics ! {
30+ /// Call to enable LSE in outline atomic operations. The caller must verify
31+ /// LSE operations are supported.
32+ pub extern "C" fn __rust_enable_lse( ) {
33+ HAVE_LSE_ATOMICS . store( 1 , Ordering :: Relaxed ) ;
34+ }
35+ }
2836
2937/// Translate a byte size to a Rust type.
3038#[ rustfmt:: skip]
@@ -126,6 +134,39 @@ macro_rules! stxp {
126134 } ;
127135}
128136
137+ // Check if LSE intrinsic can be used, and jump to label if not.
138+ macro_rules! jmp_if_no_lse {
139+ ( $label: literal) => {
140+ concat!(
141+ ".arch_extension lse; " ,
142+ "adrp x16, {have_lse}; " ,
143+ "ldrb w16, [x16, :lo12:{have_lse}]; " ,
144+ "cbz w16, " ,
145+ $label,
146+ ";"
147+ )
148+ } ;
149+ }
150+
151+ // Translate memory ordering to the LSE suffix
152+ #[ rustfmt:: skip]
153+ macro_rules! lse_mem_sfx {
154+ ( Relaxed ) => { "" } ;
155+ ( Acquire ) => { "a" } ;
156+ ( Release ) => { "l" } ;
157+ ( AcqRel ) => { "al" } ;
158+ }
159+
160+ // Generate the aarch64 LSE operation for memory ordering and width
161+ macro_rules! lse {
162+ ( $op: literal, $order: ident, 16 ) => {
163+ concat!( $op, "p" , lse_mem_sfx!( $order) )
164+ } ;
165+ ( $op: literal, $order: ident, $bytes: tt) => {
166+ concat!( $op, lse_mem_sfx!( $order) , size!( $bytes) )
167+ } ;
168+ }
169+
129170/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
130171macro_rules! compare_and_swap {
131172 ( $ordering: ident, $bytes: tt, $name: ident) => {
@@ -137,6 +178,11 @@ macro_rules! compare_and_swap {
137178 ) -> int_ty!( $bytes) {
138179 // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
139180 core:: arch:: naked_asm! {
181+ jmp_if_no_lse!( "8f" ) ,
182+ // CAS s(0), s(1), [x2]
183+ concat!( lse!( "cas" , $ordering, $bytes) , " " , reg!( $bytes, 0 ) , ", " , reg!( $bytes, 1 ) , ", [x2]" ) ,
184+ "ret" ,
185+ "8:" ,
140186 // UXT s(tmp0), s(0)
141187 concat!( uxt!( $bytes) , " " , reg!( $bytes, 16 ) , ", " , reg!( $bytes, 0 ) ) ,
142188 "0:" ,
@@ -150,6 +196,7 @@ macro_rules! compare_and_swap {
150196 "cbnz w17, 0b" ,
151197 "1:" ,
152198 "ret" ,
199+ have_lse = sym crate :: aarch64_outline_atomics:: HAVE_LSE_ATOMICS ,
153200 }
154201 }
155202 }
@@ -166,6 +213,11 @@ macro_rules! compare_and_swap_i128 {
166213 expected: i128 , desired: i128 , ptr: * mut i128
167214 ) -> i128 {
168215 core:: arch:: naked_asm! {
216+ jmp_if_no_lse!( "8f" ) ,
217+ // CASP x0, x1, x2, x3, [x4]
218+ concat!( lse!( "cas" , $ordering, 16 ) , " x0, x1, x2, x3, [x4]" ) ,
219+ "ret" ,
220+ "8:" ,
169221 "mov x16, x0" ,
170222 "mov x17, x1" ,
171223 "0:" ,
@@ -179,6 +231,7 @@ macro_rules! compare_and_swap_i128 {
179231 "cbnz w15, 0b" ,
180232 "1:" ,
181233 "ret" ,
234+ have_lse = sym crate :: aarch64_outline_atomics:: HAVE_LSE_ATOMICS ,
182235 }
183236 }
184237 }
@@ -195,6 +248,11 @@ macro_rules! swap {
195248 left: int_ty!( $bytes) , right_ptr: * mut int_ty!( $bytes)
196249 ) -> int_ty!( $bytes) {
197250 core:: arch:: naked_asm! {
251+ jmp_if_no_lse!( "8f" ) ,
252+ // SWP s(0), s(0), [x1]
253+ concat!( lse!( "swp" , $ordering, $bytes) , " " , reg!( $bytes, 0 ) , ", " , reg!( $bytes, 0 ) , ", [x1]" ) ,
254+ "ret" ,
255+ "8:" ,
198256 // mov s(tmp0), s(0)
199257 concat!( "mov " , reg!( $bytes, 16 ) , ", " , reg!( $bytes, 0 ) ) ,
200258 "0:" ,
@@ -204,6 +262,7 @@ macro_rules! swap {
204262 concat!( stxr!( $ordering, $bytes) , " w17, " , reg!( $bytes, 16 ) , ", [x1]" ) ,
205263 "cbnz w17, 0b" ,
206264 "ret" ,
265+ have_lse = sym crate :: aarch64_outline_atomics:: HAVE_LSE_ATOMICS ,
207266 }
208267 }
209268 }
@@ -212,14 +271,19 @@ macro_rules! swap {
212271
213272/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
214273macro_rules! fetch_op {
215- ( $ordering: ident, $bytes: tt, $name: ident, $op: literal) => {
274+ ( $ordering: ident, $bytes: tt, $name: ident, $op: literal, $lse_op : literal ) => {
216275 intrinsics! {
217276 #[ maybe_use_optimized_c_shim]
218277 #[ unsafe ( naked) ]
219278 pub unsafe extern "C" fn $name (
220279 val: int_ty!( $bytes) , ptr: * mut int_ty!( $bytes)
221280 ) -> int_ty!( $bytes) {
222281 core:: arch:: naked_asm! {
282+ jmp_if_no_lse!( "8f" ) ,
283+ // LSEOP s(0), s(0), [x1]
284+ concat!( lse!( $lse_op, $ordering, $bytes) , " " , reg!( $bytes, 0 ) , ", " , reg!( $bytes, 0 ) , ", [x1]" ) ,
285+ "ret" ,
286+ "8:" ,
223287 // mov s(tmp0), s(0)
224288 concat!( "mov " , reg!( $bytes, 16 ) , ", " , reg!( $bytes, 0 ) ) ,
225289 "0:" ,
@@ -231,6 +295,7 @@ macro_rules! fetch_op {
231295 concat!( stxr!( $ordering, $bytes) , " w15, " , reg!( $bytes, 17 ) , ", [x1]" ) ,
232296 "cbnz w15, 0b" ,
233297 "ret" ,
298+ have_lse = sym crate :: aarch64_outline_atomics:: HAVE_LSE_ATOMICS ,
234299 }
235300 }
236301 }
@@ -240,25 +305,25 @@ macro_rules! fetch_op {
240305// We need a single macro to pass to `foreach_ldadd`.
241306macro_rules! add {
242307 ( $ordering: ident, $bytes: tt, $name: ident) => {
243- fetch_op! { $ordering, $bytes, $name, "add" }
308+ fetch_op! { $ordering, $bytes, $name, "add" , "ldadd" }
244309 } ;
245310}
246311
247312macro_rules! and {
248313 ( $ordering: ident, $bytes: tt, $name: ident) => {
249- fetch_op! { $ordering, $bytes, $name, "bic" }
314+ fetch_op! { $ordering, $bytes, $name, "bic" , "ldclr" }
250315 } ;
251316}
252317
253318macro_rules! xor {
254319 ( $ordering: ident, $bytes: tt, $name: ident) => {
255- fetch_op! { $ordering, $bytes, $name, "eor" }
320+ fetch_op! { $ordering, $bytes, $name, "eor" , "ldeor" }
256321 } ;
257322}
258323
259324macro_rules! or {
260325 ( $ordering: ident, $bytes: tt, $name: ident) => {
261- fetch_op! { $ordering, $bytes, $name, "orr" }
326+ fetch_op! { $ordering, $bytes, $name, "orr" , "ldset" }
262327 } ;
263328}
264329
0 commit comments