From 5587362acdeb425738d4ccc131a76f69cf8112af Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 25 Jul 2025 22:31:44 +0200 Subject: [PATCH 1/9] Use System allocator for thread-local storage --- .../src/sys/thread_local/destructors/list.rs | 15 ++--- library/std/src/sys/thread_local/key/xous.rs | 6 +- library/std/src/sys/thread_local/os.rs | 26 ++++---- library/std/src/thread/mod.rs | 14 ++-- .../threads-sendsync/tls-in-global-alloc.rs | 64 +++++++++++++++++++ 5 files changed, 98 insertions(+), 27 deletions(-) create mode 100644 tests/ui/threads-sendsync/tls-in-global-alloc.rs diff --git a/library/std/src/sys/thread_local/destructors/list.rs b/library/std/src/sys/thread_local/destructors/list.rs index b9d5214c438d2..606694cc78d79 100644 --- a/library/std/src/sys/thread_local/destructors/list.rs +++ b/library/std/src/sys/thread_local/destructors/list.rs @@ -1,19 +1,14 @@ +use crate::alloc::System; use crate::cell::RefCell; use crate::sys::thread_local::guard; #[thread_local] -static DTORS: RefCell> = RefCell::new(Vec::new()); +static DTORS: RefCell> = + RefCell::new(Vec::new_in(System)); pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - let Ok(mut dtors) = DTORS.try_borrow_mut() else { - // This point can only be reached if the global allocator calls this - // function again. - // FIXME: maybe use the system allocator instead? - rtabort!("the global allocator may not use TLS with destructors"); - }; - + let Ok(mut dtors) = DTORS.try_borrow_mut() else { rtabort!("unreachable") }; guard::enable(); - dtors.push((t, dtor)); } @@ -36,7 +31,7 @@ pub unsafe fn run() { } None => { // Free the list memory. - *dtors = Vec::new(); + *dtors = Vec::new_in(System); break; } } diff --git a/library/std/src/sys/thread_local/key/xous.rs b/library/std/src/sys/thread_local/key/xous.rs index a27cec5ca1a60..529178f34757b 100644 --- a/library/std/src/sys/thread_local/key/xous.rs +++ b/library/std/src/sys/thread_local/key/xous.rs @@ -38,6 +38,7 @@ use core::arch::asm; +use crate::alloc::System; use crate::mem::ManuallyDrop; use crate::os::xous::ffi::{MemoryFlags, map_memory, unmap_memory}; use crate::ptr; @@ -151,7 +152,10 @@ struct Node { } unsafe fn register_dtor(key: Key, dtor: Dtor) { - let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() })); + // We use the System allocator here to avoid interfering with a potential + // Global allocator using thread-local storage. + let mut node = + ManuallyDrop::new(Box::new_in(Node { key, dtor, next: ptr::null_mut() }, System)); #[allow(unused_unsafe)] let mut head = unsafe { DTORS.load(Acquire) }; diff --git a/library/std/src/sys/thread_local/os.rs b/library/std/src/sys/thread_local/os.rs index 88bb5ae7c650d..d17c3c31beebd 100644 --- a/library/std/src/sys/thread_local/os.rs +++ b/library/std/src/sys/thread_local/os.rs @@ -1,6 +1,6 @@ use super::key::{Key, LazyKey, get, set}; use super::{abort_on_dtor_unwind, guard}; -use crate::alloc::{self, Layout}; +use crate::alloc::{self, GlobalAlloc, Layout, System}; use crate::cell::Cell; use crate::marker::PhantomData; use crate::mem::ManuallyDrop; @@ -113,17 +113,19 @@ pub const fn value_align() -> usize { crate::mem::align_of::>() } -/// Equivalent to `Box>`, but potentially over-aligned. -struct AlignedBox { +/// Equivalent to `Box, System>`, but potentially over-aligned. +struct AlignedSystemBox { ptr: NonNull>, } -impl AlignedBox { +impl AlignedSystemBox { #[inline] fn new(v: Value) -> Self { let layout = Layout::new::>().align_to(ALIGN).unwrap(); - let ptr: *mut Value = (unsafe { alloc::alloc(layout) }).cast(); + // We use the System allocator here to avoid interfering with a potential + // Global allocator using thread-local storage. + let ptr: *mut Value = (unsafe { System.alloc(layout) }).cast(); let Some(ptr) = NonNull::new(ptr) else { alloc::handle_alloc_error(layout); }; @@ -143,7 +145,7 @@ impl AlignedBox { } } -impl Deref for AlignedBox { +impl Deref for AlignedSystemBox { type Target = Value; #[inline] @@ -152,14 +154,14 @@ impl Deref for AlignedBox { } } -impl Drop for AlignedBox { +impl Drop for AlignedSystemBox { #[inline] fn drop(&mut self) { let layout = Layout::new::>().align_to(ALIGN).unwrap(); unsafe { let unwind_result = catch_unwind(AssertUnwindSafe(|| self.ptr.drop_in_place())); - alloc::dealloc(self.ptr.as_ptr().cast(), layout); + System.dealloc(self.ptr.as_ptr().cast(), layout); if let Err(payload) = unwind_result { resume_unwind(payload); } @@ -205,11 +207,11 @@ impl Storage { return ptr::null(); } - let value = AlignedBox::::new(Value { + let value = AlignedSystemBox::::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key, }); - let ptr = AlignedBox::into_raw(value); + let ptr = AlignedSystemBox::into_raw(value); // SAFETY: // * key came from a `LazyKey` and is thus correct. @@ -227,7 +229,7 @@ impl Storage { // initializer has already returned and the next scope only starts // after we return the pointer. Therefore, there can be no references // to the old value. - drop(unsafe { AlignedBox::::from_raw(old) }); + drop(unsafe { AlignedSystemBox::::from_raw(old) }); } // SAFETY: We just created this value above. @@ -246,7 +248,7 @@ unsafe extern "C" fn destroy_value(ptr: *mut u8) // Note that to prevent an infinite loop we reset it back to null right // before we return from the destructor ourselves. abort_on_dtor_unwind(|| { - let ptr = unsafe { AlignedBox::::from_raw(ptr as *mut Value) }; + let ptr = unsafe { AlignedSystemBox::::from_raw(ptr as *mut Value) }; let key = ptr.key; // SAFETY: `key` is the TLS key `ptr` was stored under. unsafe { set(key, ptr::without_provenance_mut(1)) }; diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index fd7cce3f97db8..d20592275a5d4 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -158,6 +158,7 @@ #[cfg(all(test, not(any(target_os = "emscripten", target_os = "wasi"))))] mod tests; +use crate::alloc::System; use crate::any::Any; use crate::cell::UnsafeCell; use crate::ffi::CStr; @@ -1466,7 +1467,10 @@ impl Inner { /// /// [`thread::current`]: current::current pub struct Thread { - inner: Pin>, + // We use the System allocator such that creating or dropping this handle + // does not interfere with a potential Global allocator using thread-local + // storage. + inner: Pin>, } impl Thread { @@ -1479,7 +1483,7 @@ impl Thread { // SAFETY: We pin the Arc immediately after creation, so its address never // changes. let inner = unsafe { - let mut arc = Arc::::new_uninit(); + let mut arc = Arc::::new_uninit_in(System); let ptr = Arc::get_mut_unchecked(&mut arc).as_mut_ptr(); (&raw mut (*ptr).name).write(name); (&raw mut (*ptr).id).write(id); @@ -1650,7 +1654,7 @@ impl Thread { pub fn into_raw(self) -> *const () { // Safety: We only expose an opaque pointer, which maintains the `Pin` invariant. let inner = unsafe { Pin::into_inner_unchecked(self.inner) }; - Arc::into_raw(inner) as *const () + Arc::into_raw_with_allocator(inner).0 as *const () } /// Constructs a `Thread` from a raw pointer. @@ -1672,7 +1676,9 @@ impl Thread { #[unstable(feature = "thread_raw", issue = "97523")] pub unsafe fn from_raw(ptr: *const ()) -> Thread { // Safety: Upheld by caller. - unsafe { Thread { inner: Pin::new_unchecked(Arc::from_raw(ptr as *const Inner)) } } + unsafe { + Thread { inner: Pin::new_unchecked(Arc::from_raw_in(ptr as *const Inner, System)) } + } } fn cname(&self) -> Option<&CStr> { diff --git a/tests/ui/threads-sendsync/tls-in-global-alloc.rs b/tests/ui/threads-sendsync/tls-in-global-alloc.rs new file mode 100644 index 0000000000000..82a96ce9a6c3a --- /dev/null +++ b/tests/ui/threads-sendsync/tls-in-global-alloc.rs @@ -0,0 +1,64 @@ +//@ run-pass +//@ needs-threads + +use std::alloc::{GlobalAlloc, Layout, System}; +use std::thread::Thread; +use std::sync::atomic::{AtomicUsize, AtomicBool, Ordering}; + +static GLOBAL: AtomicUsize = AtomicUsize::new(0); + +struct Local(Thread); + +thread_local! { + static LOCAL: Local = { + GLOBAL.fetch_or(1, Ordering::Relaxed); + Local(std::thread::current()) + }; +} + +impl Drop for Local { + fn drop(&mut self) { + GLOBAL.fetch_or(2, Ordering::Relaxed); + } +} + +static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false); + +#[global_allocator] +static ALLOC: Alloc = Alloc; +struct Alloc; + +unsafe impl GlobalAlloc for Alloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // Make sure we aren't re-entrant. + assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); + LOCAL.with(|local| { + assert!(local.0.id() == std::thread::current().id()); + }); + let ret = unsafe { System.alloc(layout) }; + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); + ret + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + // Make sure we aren't re-entrant. + assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); + LOCAL.with(|local| { + assert!(local.0.id() == std::thread::current().id()); + }); + unsafe { System.dealloc(ptr, layout) } + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); + } +} + +fn main() { + std::thread::spawn(|| { + std::hint::black_box(vec![1, 2]); + assert!(GLOBAL.load(Ordering::Relaxed) == 1); + }) + .join() + .unwrap(); + assert!(GLOBAL.load(Ordering::Relaxed) == 3); +} From d17b0c3c65a60494416ae469e05998beedc9e812 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Sat, 26 Jul 2025 14:30:00 +0200 Subject: [PATCH 2/9] Use spinlock for ThreadId if 64-bit atomic unavailable --- library/std/src/thread/mod.rs | 48 ++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index d20592275a5d4..adac9823ccd1c 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -1259,21 +1259,45 @@ impl ThreadId { } } _ => { - use crate::sync::{Mutex, PoisonError}; - - static COUNTER: Mutex = Mutex::new(0); + use crate::cell::SyncUnsafeCell; + use crate::hint::spin_loop; + use crate::sync::atomic::{Atomic, AtomicBool}; + use crate::thread::yield_now; + + // If we don't have a 64-bit atomic we use a small spinlock. We don't use Mutex + // here as we might be trying to get the current thread id in the global allocator, + // and on some platforms Mutex requires allocation. + static COUNTER_LOCKED: Atomic = AtomicBool::new(false); + static COUNTER: SyncUnsafeCell = SyncUnsafeCell::new(0); + + // Acquire lock. + let mut spin = 0; + while COUNTER_LOCKED.compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed).is_err() { + if spin <= 3 { + for _ in 0..(1 << spin) { + spin_loop(); + } + } else { + yield_now(); + } + spin += 1; + } - let mut counter = COUNTER.lock().unwrap_or_else(PoisonError::into_inner); - let Some(id) = counter.checked_add(1) else { - // in case the panic handler ends up calling `ThreadId::new()`, - // avoid reentrant lock acquire. - drop(counter); - exhausted(); + let id; + // SAFETY: we have an exclusive lock on the counter. + unsafe { + id = (*COUNTER.get()).saturating_add(1); + (*COUNTER.get()) = id; }; - *counter = id; - drop(counter); - ThreadId(NonZero::new(id).unwrap()) + // Release the lock. + COUNTER_LOCKED.store(false, Ordering::Release); + + if id == u64::MAX { + exhausted() + } else { + ThreadId(NonZero::new(id).unwrap()) + } } } } From 1a9fc365b34f9aefd580dda256e781e6c0755a6d Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Tue, 29 Jul 2025 19:43:29 +0200 Subject: [PATCH 3/9] Add documentation guaranteeing global allocator use of TLS Remove outdated part of comment claiming thread_local re-enters global allocator Fix typo in doc comment Add comments for guarantees given and footnote that System may still be called Revise mention of using the global allocator Allow for the possibility that the global allocator is the system allocator. Co-authored-by: Mark Rousskov --- library/core/src/alloc/global.rs | 25 +++++++++++++++++++++++++ library/std/src/alloc.rs | 9 ++++++++- library/std/src/thread/current.rs | 19 ++++++++++--------- library/std/src/thread/local.rs | 7 ++++++- 4 files changed, 49 insertions(+), 11 deletions(-) diff --git a/library/core/src/alloc/global.rs b/library/core/src/alloc/global.rs index 5bf6f143b4f82..a572ba12baa4b 100644 --- a/library/core/src/alloc/global.rs +++ b/library/core/src/alloc/global.rs @@ -115,6 +115,31 @@ use crate::{cmp, ptr}; /// Whether allocations happen or not is not part of the program behavior, even if it /// could be detected via an allocator that tracks allocations by printing or otherwise /// having side effects. +/// +/// # Re-entrance +/// +/// When implementing a global allocator one has to be careful not to create an infinitely recursive +/// implementation by accident, as many constructs in the Rust standard library may allocate in +/// their implementation. For example, on some platforms [`std::sync::Mutex`] may allocate, so using +/// it is highly problematic in a global allocator. +/// +/// Generally speaking for this reason one should stick to library features available through +/// [`core`], and avoid using [`std`] in a global allocator. A few features from [`std`] are +/// guaranteed to not use `#[global_allocator]` to allocate: +/// +/// - [`std::thread_local`], +/// - [`std::thread::current`], +/// - [`std::thread::park`] and [`std::thread::Thread`]'s [`unpark`] method and +/// [`Clone`] implementation. +/// +/// [`std`]: ../../std/index.html +/// [`std::sync::Mutex`]: ../../std/sync/struct.Mutex.html +/// [`std::thread_local`]: ../../std/macro.thread_local.html +/// [`std::thread::current`]: ../../std/thread/fn.current.html +/// [`std::thread::park`]: ../../std/thread/fn.park.html +/// [`std::thread::Thread`]: ../../std/thread/struct.Thread.html +/// [`unpark`]: ../../std/thread/struct.Thread.html#method.unpark + #[stable(feature = "global_alloc", since = "1.28.0")] pub unsafe trait GlobalAlloc { /// Allocates memory as described by the given `layout`. diff --git a/library/std/src/alloc.rs b/library/std/src/alloc.rs index 1d61630269ac3..f903a61c2a5d0 100644 --- a/library/std/src/alloc.rs +++ b/library/std/src/alloc.rs @@ -11,7 +11,7 @@ //! //! This attribute allows configuring the choice of global allocator. //! You can use this to implement a completely custom global allocator -//! to route all default allocation requests to a custom object. +//! to route all[^system-alloc] default allocation requests to a custom object. //! //! ```rust //! use std::alloc::{GlobalAlloc, System, Layout}; @@ -52,6 +52,13 @@ //! //! The `#[global_allocator]` can only be used once in a crate //! or its recursive dependencies. +//! +//! [^system-alloc]: Note that the Rust standard library internals may still +//! directly call [`System`] when necessary (for example for the runtime +//! support typically required to implement a global allocator, see [re-entrance] on [`GlobalAlloc`] +//! for more details). +//! +//! [re-entrance]: trait.GlobalAlloc.html#re-entrance #![deny(unsafe_op_in_unsafe_fn)] #![stable(feature = "alloc_module", since = "1.28.0")] diff --git a/library/std/src/thread/current.rs b/library/std/src/thread/current.rs index f00212bfcb617..c4619a80021a6 100644 --- a/library/std/src/thread/current.rs +++ b/library/std/src/thread/current.rs @@ -270,15 +270,16 @@ fn init_current(current: *mut ()) -> Thread { // extra TLS write above shouldn't matter. The alternative is nearly always // a stack overflow. - // If you came across this message, contact the author of your allocator. - // If you are said author: A surprising amount of functions inside the - // standard library (e.g. `Mutex`, `thread_local!`, `File` when using long - // paths, even `panic!` when using unwinding), need memory allocation, so - // you'll get circular dependencies all over the place when using them. - // I (joboet) highly recommend using only APIs from core in your allocator - // and implementing your own system abstractions. Still, if you feel that - // a particular API should be entirely allocation-free, feel free to open - // an issue on the Rust repository, we'll see what we can do. + // If you came across this message, contact the author of your + // allocator. If you are said author: A surprising amount of functions + // inside the standard library (e.g. `Mutex`, `File` when using long + // paths, even `panic!` when using unwinding), need memory allocation, + // so you'll get circular dependencies all over the place when using + // them. I (joboet) highly recommend using only APIs from core in your + // allocator and implementing your own system abstractions. Still, if + // you feel that a particular API should be entirely allocation-free, + // feel free to open an issue on the Rust repository, we'll see what we + // can do. rtabort!( "\n\ Attempted to access thread-local data while allocating said data.\n\ diff --git a/library/std/src/thread/local.rs b/library/std/src/thread/local.rs index 4259a4d1f3b7c..06e4b252fc8f3 100644 --- a/library/std/src/thread/local.rs +++ b/library/std/src/thread/local.rs @@ -24,12 +24,17 @@ use crate::fmt; /// [`with`]) within a thread, and values that implement [`Drop`] get /// destructed when a thread exits. Some platform-specific caveats apply, which /// are explained below. -/// Note that if the destructor panics, the whole process will be [aborted]. +/// Note that, should the destructor panic, the whole process will be [aborted]. +/// On platforms where initialization requires memory allocation, this is +/// performed directly through [`System`], allowing the [global allocator] +/// to make use of thread local storage. /// /// A `LocalKey`'s initializer cannot recursively depend on itself. Using a /// `LocalKey` in this way may cause panics, aborts, or infinite recursion on /// the first call to `with`. /// +/// [`System`]: crate::alloc::System +/// [global allocator]: crate::alloc /// [aborted]: crate::process::abort /// /// # Single-thread Synchronization From 01c5f8054d92becd8c8d34fdc53194f341c4fc5d Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Sun, 21 Sep 2025 16:37:43 +0200 Subject: [PATCH 4/9] Ensure set_current is called early during thread init --- library/std/src/sys/thread/hermit.rs | 34 ++++++------ library/std/src/sys/thread/solid.rs | 24 ++++---- library/std/src/sys/thread/teeos.rs | 31 ++++------- library/std/src/sys/thread/unix.rs | 30 +++++----- library/std/src/sys/thread/unsupported.rs | 7 +-- library/std/src/sys/thread/wasip1.rs | 27 ++++----- library/std/src/sys/thread/windows.rs | 31 +++++------ library/std/src/sys/thread/xous.rs | 21 +++---- library/std/src/thread/mod.rs | 67 +++++++++++++++-------- 9 files changed, 136 insertions(+), 136 deletions(-) diff --git a/library/std/src/sys/thread/hermit.rs b/library/std/src/sys/thread/hermit.rs index 4d9f3b114c2a0..faeaa9ae2dfcc 100644 --- a/library/std/src/sys/thread/hermit.rs +++ b/library/std/src/sys/thread/hermit.rs @@ -1,4 +1,5 @@ use crate::num::NonZero; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{io, ptr}; @@ -16,14 +17,14 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 1 << 20; impl Thread { pub unsafe fn new_with_coreid( stack: usize, - p: Box, + init: Box, core_id: isize, ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + let data = Box::into_raw(init); let tid = unsafe { hermit_abi::spawn2( thread_start, - p.expose_provenance(), + data.expose_provenance(), hermit_abi::Priority::into(hermit_abi::NORMAL_PRIO), stack, core_id, @@ -31,35 +32,34 @@ impl Thread { }; return if tid == 0 { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. unsafe { - drop(Box::from_raw(p)); + drop(Box::from_raw(data)); } Err(io::const_error!(io::ErrorKind::Uncategorized, "unable to create thread!")) } else { Ok(Thread { tid }) }; - extern "C" fn thread_start(main: usize) { - unsafe { - // Finally, let's run some code. - Box::from_raw(ptr::with_exposed_provenance::>(main).cast_mut())(); + extern "C" fn thread_start(data: usize) { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = + unsafe { Box::from_raw(ptr::with_exposed_provenance_mut::(data)) }; + let rust_start = init.init(); + rust_start(); - // run all destructors + // Run all destructors. + unsafe { crate::sys::thread_local::destructors::run(); - crate::rt::thread_cleanup(); } + crate::rt::thread_cleanup(); } } - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { + pub unsafe fn new(stack: usize, init: Box) -> io::Result { unsafe { - Thread::new_with_coreid(stack, p, -1 /* = no specific core */) + Thread::new_with_coreid(stack, init, -1 /* = no specific core */) } } diff --git a/library/std/src/sys/thread/solid.rs b/library/std/src/sys/thread/solid.rs index 46a84faa80225..5953c0e7b6129 100644 --- a/library/std/src/sys/thread/solid.rs +++ b/library/std/src/sys/thread/solid.rs @@ -8,6 +8,7 @@ use crate::sync::atomic::{Atomic, AtomicUsize, Ordering}; use crate::sys::pal::itron::error::{ItronError, expect_success, expect_success_aborting}; use crate::sys::pal::itron::time::dur2reltims; use crate::sys::pal::itron::{abi, task}; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{hint, io}; @@ -27,9 +28,9 @@ unsafe impl Sync for Thread {} /// State data shared between a parent thread and child thread. It's dropped on /// a transition to one of the final states. struct ThreadInner { - /// This field is used on thread creation to pass a closure from + /// This field is used on thread creation to pass initialization data from /// `Thread::new` to the created task. - start: UnsafeCell>>, + init: UnsafeCell>>, /// A state machine. Each transition is annotated with `[...]` in the /// source code. @@ -65,7 +66,7 @@ struct ThreadInner { lifecycle: Atomic, } -// Safety: The only `!Sync` field, `ThreadInner::start`, is only touched by +// Safety: The only `!Sync` field, `ThreadInner::init`, is only touched by // the task represented by `ThreadInner`. unsafe impl Sync for ThreadInner {} @@ -84,13 +85,9 @@ impl Thread { /// # Safety /// /// See `thread::Builder::spawn_unchecked` for safety requirements. - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { + pub unsafe fn new(stack: usize, init: Box) -> io::Result { let inner = Box::new(ThreadInner { - start: UnsafeCell::new(ManuallyDrop::new(p)), + init: UnsafeCell::new(ManuallyDrop::new(init)), lifecycle: AtomicUsize::new(LIFECYCLE_INIT), }); @@ -100,10 +97,11 @@ impl Thread { let inner = unsafe { &*p_inner }; // Safety: Since `trampoline` is called only once for each - // `ThreadInner` and only `trampoline` touches `start`, - // `start` contains contents and is safe to mutably borrow. - let p = unsafe { ManuallyDrop::take(&mut *inner.start.get()) }; - p(); + // `ThreadInner` and only `trampoline` touches `init`, + // `init` contains contents and is safe to mutably borrow. + let init = unsafe { ManuallyDrop::take(&mut *inner.init.get()) }; + let rust_start = init.init(); + rust_start(); // Fix the current thread's state just in case, so that the // destructors won't abort diff --git a/library/std/src/sys/thread/teeos.rs b/library/std/src/sys/thread/teeos.rs index cad100395c9f8..5e71f757eaa4b 100644 --- a/library/std/src/sys/thread/teeos.rs +++ b/library/std/src/sys/thread/teeos.rs @@ -1,5 +1,6 @@ use crate::mem::{self, ManuallyDrop}; use crate::sys::os; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{cmp, io, ptr}; @@ -24,12 +25,8 @@ unsafe impl Sync for Thread {} impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut native: libc::pthread_t = unsafe { mem::zeroed() }; let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() }; assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0); @@ -62,16 +59,16 @@ impl Thread { } }; - let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) }; - // Note: if the thread creation fails and this assert fails, then p will + let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, data as *mut _) }; + // Note: if the thread creation fails and this assert fails, then data will // be leaked. However, an alternative design could cause double-free // which is clearly worse. assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0); return if ret != 0 { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. - drop(unsafe { Box::from_raw(p) }); + drop(unsafe { Box::from_raw(data) }); Err(io::Error::from_raw_os_error(ret)) } else { // The new thread will start running earliest after the next yield. @@ -80,15 +77,11 @@ impl Thread { Ok(Thread { id: native }) }; - extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { - unsafe { - // Next, set up our stack overflow handler which may get triggered if we run - // out of stack. - // this is not necessary in TEE. - //let _handler = stack_overflow::Handler::new(); - // Finally, let's run some code. - Box::from_raw(main as *mut Box)(); - } + extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + rust_start(); ptr::null_mut() } } diff --git a/library/std/src/sys/thread/unix.rs b/library/std/src/sys/thread/unix.rs index 2d2c4f9021288..21f362edced03 100644 --- a/library/std/src/sys/thread/unix.rs +++ b/library/std/src/sys/thread/unix.rs @@ -14,6 +14,7 @@ use crate::sys::weak::dlsym; #[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))] use crate::sys::weak::weak; use crate::sys::{os, stack_overflow}; +use crate::thread::{ThreadInit, current}; use crate::time::Duration; use crate::{cmp, io, ptr}; #[cfg(not(any( @@ -30,11 +31,6 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024; #[cfg(any(target_os = "espidf", target_os = "nuttx"))] pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used -struct ThreadData { - name: Option>, - f: Box, -} - pub struct Thread { id: libc::pthread_t, } @@ -47,12 +43,8 @@ unsafe impl Sync for Thread {} impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn new( - stack: usize, - name: Option<&str>, - f: Box, - ) -> io::Result { - let data = Box::into_raw(Box::new(ThreadData { name: name.map(Box::from), f })); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut native: libc::pthread_t = mem::zeroed(); let mut attr: mem::MaybeUninit = mem::MaybeUninit::uninit(); assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0); @@ -118,12 +110,16 @@ impl Thread { extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void { unsafe { - let data = Box::from_raw(data as *mut ThreadData); - // Next, set up our stack overflow handler which may get triggered if we run - // out of stack. - let _handler = stack_overflow::Handler::new(data.name); - // Finally, let's run some code. - (data.f)(); + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = Box::from_raw(data as *mut ThreadInit); + let rust_start = init.init(); + + // Set up our thread name and stack overflow handler which may get triggered + // if we run out of stack. + let thread = current(); + let _handler = stack_overflow::Handler::new(thread.name().map(Box::from)); + + rust_start(); } ptr::null_mut() } diff --git a/library/std/src/sys/thread/unsupported.rs b/library/std/src/sys/thread/unsupported.rs index a5001efa3b405..2d8524f825499 100644 --- a/library/std/src/sys/thread/unsupported.rs +++ b/library/std/src/sys/thread/unsupported.rs @@ -1,6 +1,7 @@ use crate::ffi::CStr; use crate::io; use crate::num::NonZero; +use crate::thread::ThreadInit; use crate::time::Duration; pub struct Thread(!); @@ -9,11 +10,7 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024; impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - _stack: usize, - _name: Option<&str>, - _p: Box, - ) -> io::Result { + pub unsafe fn new(_stack: usize, _init: Box) -> io::Result { Err(io::Error::UNSUPPORTED_PLATFORM) } diff --git a/library/std/src/sys/thread/wasip1.rs b/library/std/src/sys/thread/wasip1.rs index 83001fad49c81..6932691b74390 100644 --- a/library/std/src/sys/thread/wasip1.rs +++ b/library/std/src/sys/thread/wasip1.rs @@ -7,6 +7,7 @@ use crate::mem; use crate::num::NonZero; #[cfg(target_feature = "atomics")] use crate::sys::os; +use crate::thread::ThreadInit; use crate::time::Duration; #[cfg(target_feature = "atomics")] use crate::{cmp, ptr}; @@ -73,12 +74,8 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024; #[cfg(target_feature = "atomics")] impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut native: libc::pthread_t = unsafe { mem::zeroed() }; let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() }; assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0); @@ -100,28 +97,28 @@ impl Thread { } }; - let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) }; - // Note: if the thread creation fails and this assert fails, then p will + let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, data as *mut _) }; + // Note: if the thread creation fails and this assert fails, then data will // be leaked. However, an alternative design could cause double-free // which is clearly worse. assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0); return if ret != 0 { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. unsafe { - drop(Box::from_raw(p)); + drop(Box::from_raw(data)); } Err(io::Error::from_raw_os_error(ret)) } else { Ok(Thread { id: native }) }; - extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { - unsafe { - // Finally, let's run some code. - Box::from_raw(main as *mut Box)(); - } + extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + rust_start(); ptr::null_mut() } } diff --git a/library/std/src/sys/thread/windows.rs b/library/std/src/sys/thread/windows.rs index a5640c51c4a5d..1ef496a20cfe4 100644 --- a/library/std/src/sys/thread/windows.rs +++ b/library/std/src/sys/thread/windows.rs @@ -8,6 +8,7 @@ use crate::sys::pal::time::WaitableTimer; use crate::sys::pal::{dur2timeout, to_u16s}; use crate::sys::{c, stack_overflow}; use crate::sys_common::FromInner; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{io, ptr}; @@ -20,23 +21,19 @@ pub struct Thread { impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); // CreateThread rounds up values for the stack size to the nearest page size (at least 4kb). // If a value of zero is given then the default stack size is used instead. // SAFETY: `thread_start` has the right ABI for a thread's entry point. - // `p` is simply passed through to the new thread without being touched. + // `data` is simply passed through to the new thread without being touched. let ret = unsafe { let ret = c::CreateThread( ptr::null_mut(), stack, Some(thread_start), - p as *mut _, + data as *mut _, c::STACK_SIZE_PARAM_IS_A_RESERVATION, ptr::null_mut(), ); @@ -45,19 +42,21 @@ impl Thread { return if let Ok(handle) = ret.try_into() { Ok(Thread { handle: Handle::from_inner(handle) }) } else { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. - unsafe { drop(Box::from_raw(p)) }; + unsafe { drop(Box::from_raw(data)) }; Err(io::Error::last_os_error()) }; - unsafe extern "system" fn thread_start(main: *mut c_void) -> u32 { - // Next, reserve some stack space for if we otherwise run out of stack. + unsafe extern "system" fn thread_start(data: *mut c_void) -> u32 { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + + // Reserve some stack space for if we otherwise run out of stack. stack_overflow::reserve_stack(); - // Finally, let's run some code. - // SAFETY: We are simply recreating the box that was leaked earlier. - // It's the responsibility of the one who call `Thread::new` to ensure this is safe to call here. - unsafe { Box::from_raw(main as *mut Box)() }; + + rust_start(); 0 } } diff --git a/library/std/src/sys/thread/xous.rs b/library/std/src/sys/thread/xous.rs index 133e15a0928c6..28f751337a235 100644 --- a/library/std/src/sys/thread/xous.rs +++ b/library/std/src/sys/thread/xous.rs @@ -7,6 +7,7 @@ use crate::os::xous::ffi::{ map_memory, update_memory_flags, }; use crate::os::xous::services::{TicktimerScalar, ticktimer_server}; +use crate::thread::ThreadInit; use crate::time::Duration; pub struct Thread { @@ -19,12 +20,8 @@ pub const GUARD_PAGE_SIZE: usize = 4096; impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut stack_size = crate::cmp::max(stack, MIN_STACK_SIZE); if (stack_size & 4095) != 0 { @@ -65,7 +62,7 @@ impl Thread { let tid = create_thread( thread_start as *mut usize, &mut stack_plus_guard_pages[GUARD_PAGE_SIZE..(stack_size + GUARD_PAGE_SIZE)], - p as usize, + data as usize, guard_page_pre, stack_size, 0, @@ -73,14 +70,14 @@ impl Thread { .map_err(|code| io::Error::from_raw_os_error(code as i32))?; extern "C" fn thread_start( - main: *mut usize, + data: *mut usize, guard_page_pre: usize, stack_size: usize, ) -> ! { - unsafe { - // Run the contents of the new thread. - Box::from_raw(main as *mut Box)(); - } + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + rust_start(); // Destroy TLS, which will free the TLS page and call the destructor for // any thread local storage (if any). diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index adac9823ccd1c..2cb0aac2a77cd 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -212,6 +212,36 @@ pub mod local_impl { pub use crate::sys::thread_local::*; } +/// The data passed to the spawned thread for thread initialization. Any thread +/// implementation should start a new thread by calling .init() on this before +/// doing anything else to ensure the current thread is properly initialized and +/// the global allocator works. +pub(crate) struct ThreadInit { + pub handle: Thread, + pub rust_start: Box, +} + +impl ThreadInit { + /// Initialize the 'current thread' mechanism on this thread, returning the + /// Rust entry point. + pub fn init(self: Box) -> Box { + // Set the current thread before any (de)allocations on the global allocator occur, + // so that it may call std::thread::current() in its implementation. This is also + // why we take Box, to ensure the Box is not destroyed until after this point. + // Cloning the handle does not invoke the global allocator, it is an Arc. + if let Err(_thread) = set_current(self.handle.clone()) { + // The current thread should not have set yet. Use an abort to save binary size (see #123356). + rtabort!("current thread handle already set during thread spawn"); + } + + if let Some(name) = self.handle.cname() { + imp::set_name(name); + } + + self.rust_start + } +} + //////////////////////////////////////////////////////////////////////////////// // Builder //////////////////////////////////////////////////////////////////////////////// @@ -503,16 +533,14 @@ impl Builder { }); let id = ThreadId::new(); - let my_thread = Thread::new(id, name); + let thread = Thread::new(id, name); let hooks = if no_hooks { spawnhook::ChildSpawnHooks::default() } else { - spawnhook::run_spawn_hooks(&my_thread) + spawnhook::run_spawn_hooks(&thread) }; - let their_thread = my_thread.clone(); - let my_packet: Arc> = Arc::new(Packet { scope: scope_data, result: UnsafeCell::new(None), @@ -544,19 +572,10 @@ impl Builder { } let f = MaybeDangling::new(f); - let main = move || { - if let Err(_thread) = set_current(their_thread.clone()) { - // Both the current thread handle and the ID should not be - // initialized yet. Since only the C runtime and some of our - // platform code run before this, this point shouldn't be - // reachable. Use an abort to save binary size (see #123356). - rtabort!("something here is badly broken!"); - } - - if let Some(name) = their_thread.cname() { - imp::set_name(name); - } + // The entrypoint of the Rust thread, after platform-specific thread + // initialization is done. + let rust_start = move || { let f = f.into_inner(); let try_result = panic::catch_unwind(panic::AssertUnwindSafe(|| { crate::sys::backtrace::__rust_begin_short_backtrace(|| hooks.run()); @@ -579,11 +598,15 @@ impl Builder { scope_data.increment_num_running_threads(); } - let main = Box::new(main); // SAFETY: dynamic size and alignment of the Box remain the same. See below for why the // lifetime change is justified. - let main = - unsafe { Box::from_raw(Box::into_raw(main) as *mut (dyn FnOnce() + Send + 'static)) }; + let rust_start = unsafe { + Box::from_raw( + Box::into_raw(Box::new(rust_start)) as *mut (dyn FnOnce() + Send + 'static) + ) + }; + + let init = Box::new(ThreadInit { handle: thread.clone(), rust_start }); Ok(JoinInner { // SAFETY: @@ -599,8 +622,8 @@ impl Builder { // Similarly, the `sys` implementation must guarantee that no references to the closure // exist after the thread has terminated, which is signaled by `Thread::join` // returning. - native: unsafe { imp::Thread::new(stack_size, my_thread.name(), main)? }, - thread: my_thread, + native: unsafe { imp::Thread::new(stack_size, init)? }, + thread, packet: my_packet, }) } @@ -1705,7 +1728,7 @@ impl Thread { } } - fn cname(&self) -> Option<&CStr> { + pub(crate) fn cname(&self) -> Option<&CStr> { if let Some(name) = &self.inner.name { Some(name.as_cstr()) } else if main_thread::get() == Some(self.inner.id) { From f3270b9d569cbf7ed1c620d16a7f8d95fdcf299f Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Wed, 1 Oct 2025 15:01:20 +0200 Subject: [PATCH 5/9] Address review comments --- .../src/sys/thread_local/destructors/list.rs | 4 +++- library/std/src/thread/current.rs | 22 +++++-------------- library/std/src/thread/mod.rs | 21 ++++++++---------- 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/library/std/src/sys/thread_local/destructors/list.rs b/library/std/src/sys/thread_local/destructors/list.rs index 606694cc78d79..44e00c8a5ae59 100644 --- a/library/std/src/sys/thread_local/destructors/list.rs +++ b/library/std/src/sys/thread_local/destructors/list.rs @@ -7,7 +7,9 @@ static DTORS: RefCell> = RefCell::new(Vec::new_in(System)); pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { - let Ok(mut dtors) = DTORS.try_borrow_mut() else { rtabort!("unreachable") }; + let Ok(mut dtors) = DTORS.try_borrow_mut() else { + rtabort!("the System allocator may not use TLS with destructors") + }; guard::enable(); dtors.push((t, dtor)); } diff --git a/library/std/src/thread/current.rs b/library/std/src/thread/current.rs index c4619a80021a6..ea0c6c7229fe8 100644 --- a/library/std/src/thread/current.rs +++ b/library/std/src/thread/current.rs @@ -269,23 +269,13 @@ fn init_current(current: *mut ()) -> Thread { // BUSY exists solely for this check, but as it is in the slow path, the // extra TLS write above shouldn't matter. The alternative is nearly always // a stack overflow. - - // If you came across this message, contact the author of your - // allocator. If you are said author: A surprising amount of functions - // inside the standard library (e.g. `Mutex`, `File` when using long - // paths, even `panic!` when using unwinding), need memory allocation, - // so you'll get circular dependencies all over the place when using - // them. I (joboet) highly recommend using only APIs from core in your - // allocator and implementing your own system abstractions. Still, if - // you feel that a particular API should be entirely allocation-free, - // feel free to open an issue on the Rust repository, we'll see what we - // can do. + // + // If we reach this point it means our initialization routine ended up + // calling current() either directly, or indirectly through the global + // allocator, which is a bug either way as we may not call the global + // allocator in current(). rtabort!( - "\n\ - Attempted to access thread-local data while allocating said data.\n\ - Do not access functions that allocate in the global allocator!\n\ - This is a bug in the global allocator.\n\ - " + "init_current() was re-entrant, which indicates a bug in the Rust threading implementation" ) } else { debug_assert_eq!(current, DESTROYED); diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 2cb0aac2a77cd..fff07136b8d80 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -1306,20 +1306,17 @@ impl ThreadId { spin += 1; } - let id; // SAFETY: we have an exclusive lock on the counter. unsafe { - id = (*COUNTER.get()).saturating_add(1); - (*COUNTER.get()) = id; - }; - - // Release the lock. - COUNTER_LOCKED.store(false, Ordering::Release); - - if id == u64::MAX { - exhausted() - } else { - ThreadId(NonZero::new(id).unwrap()) + if *COUNTER.get() == u64::MAX { + COUNTER_LOCKED.store(false, Ordering::Release); + exhausted() + } else { + let id = *COUNTER.get() + 1; + *COUNTER.get() = id; + COUNTER_LOCKED.store(false, Ordering::Release); + ThreadId(NonZero::new(id).unwrap()) + } } } } From 035c0c92d6fe5a7072acda5af98a3c4b8a541490 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Wed, 1 Oct 2025 15:06:30 +0200 Subject: [PATCH 6/9] Use checked_add instead of manual overflow check --- library/std/src/thread/mod.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index fff07136b8d80..568acfbd1df2f 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -1308,14 +1308,13 @@ impl ThreadId { // SAFETY: we have an exclusive lock on the counter. unsafe { - if *COUNTER.get() == u64::MAX { - COUNTER_LOCKED.store(false, Ordering::Release); - exhausted() - } else { - let id = *COUNTER.get() + 1; + if let Some(id) = (*COUNTER.get()).checked_add(1) { *COUNTER.get() = id; COUNTER_LOCKED.store(false, Ordering::Release); ThreadId(NonZero::new(id).unwrap()) + } else { + COUNTER_LOCKED.store(false, Ordering::Release); + exhausted() } } } From 4b85f274b889411c59c69cfe7b9bfe9ecd270b3c Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 3 Oct 2025 00:52:18 +0200 Subject: [PATCH 7/9] Fix SGX implementation --- library/std/src/sys/pal/sgx/abi/mod.rs | 5 ++++- library/std/src/sys/pal/sgx/abi/tls/mod.rs | 7 ------- library/std/src/sys/thread/sgx.rs | 19 +++++++++---------- library/std/src/thread/mod.rs | 4 ++-- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/library/std/src/sys/pal/sgx/abi/mod.rs b/library/std/src/sys/pal/sgx/abi/mod.rs index b8c4d7740c4e1..1c6c681d4c179 100644 --- a/library/std/src/sys/pal/sgx/abi/mod.rs +++ b/library/std/src/sys/pal/sgx/abi/mod.rs @@ -3,6 +3,7 @@ use core::arch::global_asm; use core::sync::atomic::{Atomic, AtomicUsize, Ordering}; +use crate::alloc::System; use crate::io::Write; // runtime features @@ -63,7 +64,9 @@ unsafe extern "C" fn tcs_init(secondary: bool) { #[unsafe(no_mangle)] extern "C" fn entry(p1: u64, p2: u64, p3: u64, secondary: bool, p4: u64, p5: u64) -> EntryReturn { // FIXME: how to support TLS in library mode? - let tls = Box::new(tls::Tls::new()); + // We use the System allocator here such that the global allocator may use + // thread-locals. + let tls = Box::new_in(tls::Tls::new(), System); let tls_guard = unsafe { tls.activate() }; if secondary { diff --git a/library/std/src/sys/pal/sgx/abi/tls/mod.rs b/library/std/src/sys/pal/sgx/abi/tls/mod.rs index 41e38b6961680..553814dcb5fda 100644 --- a/library/std/src/sys/pal/sgx/abi/tls/mod.rs +++ b/library/std/src/sys/pal/sgx/abi/tls/mod.rs @@ -89,13 +89,6 @@ impl Tls { ActiveTls { tls: self } } - #[allow(unused)] - pub unsafe fn activate_persistent(self: Box) { - // FIXME: Needs safety information. See entry.S for `set_tls_ptr` definition. - let ptr = Box::into_raw(self).cast_const().cast::(); - unsafe { set_tls_ptr(ptr) }; - } - unsafe fn current<'a>() -> &'a Tls { // FIXME: Needs safety information. See entry.S for `set_tls_ptr` definition. unsafe { &*(get_tls_ptr() as *const Tls) } diff --git a/library/std/src/sys/thread/sgx.rs b/library/std/src/sys/thread/sgx.rs index f20ef7d86b9c7..9e6dcfa16713d 100644 --- a/library/std/src/sys/thread/sgx.rs +++ b/library/std/src/sys/thread/sgx.rs @@ -2,6 +2,7 @@ use crate::io; use crate::sys::pal::abi::{thread, usercalls}; +use crate::thread::ThreadInit; use crate::time::Duration; pub struct Thread(task_queue::JoinHandle); @@ -13,6 +14,7 @@ pub use self::task_queue::JoinNotifier; mod task_queue { use super::wait_notify; use crate::sync::{Mutex, MutexGuard}; + use crate::thread::ThreadInit; pub type JoinHandle = wait_notify::Waiter; @@ -25,19 +27,20 @@ mod task_queue { } pub(super) struct Task { - p: Box, + init: Box, done: JoinNotifier, } impl Task { - pub(super) fn new(p: Box) -> (Task, JoinHandle) { + pub(super) fn new(init: Box) -> (Task, JoinHandle) { let (done, recv) = wait_notify::new(); let done = JoinNotifier(Some(done)); - (Task { p, done }, recv) + (Task { init, done }, recv) } pub(super) fn run(self) -> JoinNotifier { - (self.p)(); + let rust_start = self.init.init(); + rust_start(); self.done } } @@ -93,14 +96,10 @@ pub mod wait_notify { impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - _stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { + pub unsafe fn new(_stack: usize, init: Box) -> io::Result { let mut queue_lock = task_queue::lock(); unsafe { usercalls::launch_thread()? }; - let (task, handle) = task_queue::Task::new(p); + let (task, handle) = task_queue::Task::new(init); queue_lock.push(task); Ok(Thread(handle)) } diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index 568acfbd1df2f..99a2214466a5c 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -218,13 +218,13 @@ pub mod local_impl { /// the global allocator works. pub(crate) struct ThreadInit { pub handle: Thread, - pub rust_start: Box, + pub rust_start: Box, } impl ThreadInit { /// Initialize the 'current thread' mechanism on this thread, returning the /// Rust entry point. - pub fn init(self: Box) -> Box { + pub fn init(self: Box) -> Box { // Set the current thread before any (de)allocations on the global allocator occur, // so that it may call std::thread::current() in its implementation. This is also // why we take Box, to ensure the Box is not destroyed until after this point. From 64af367328972ac18762aa83e934d7b2e6bc434d Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 3 Oct 2025 01:04:49 +0200 Subject: [PATCH 8/9] Add inline(never) to prevent reordering after TLS has been destroyed --- library/std/src/sys/thread/xous.rs | 14 ++++++++++++-- library/std/src/sys/thread_local/key/xous.rs | 5 +++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/library/std/src/sys/thread/xous.rs b/library/std/src/sys/thread/xous.rs index 28f751337a235..6c2cdfa4acddf 100644 --- a/library/std/src/sys/thread/xous.rs +++ b/library/std/src/sys/thread/xous.rs @@ -69,6 +69,12 @@ impl Thread { ) .map_err(|code| io::Error::from_raw_os_error(code as i32))?; + #[inline(never)] + fn rust_main_thread_not_inlined(init: Box) { + let rust_start = init.init(); + rust_start(); + } + extern "C" fn thread_start( data: *mut usize, guard_page_pre: usize, @@ -76,8 +82,12 @@ impl Thread { ) -> ! { // SAFETY: we are simply recreating the box that was leaked earlier. let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; - let rust_start = init.init(); - rust_start(); + + // Run the main thread with an inline(never) barrier to prevent + // dealloc calls from being reordered to after the TLS has been destroyed. + // See https://github.com/rust-lang/rust/pull/144465#pullrequestreview-3289729950 + // for more context. + run_main_thread_not_inlined(init); // Destroy TLS, which will free the TLS page and call the destructor for // any thread local storage (if any). diff --git a/library/std/src/sys/thread_local/key/xous.rs b/library/std/src/sys/thread_local/key/xous.rs index 529178f34757b..db83d2bf4a13a 100644 --- a/library/std/src/sys/thread_local/key/xous.rs +++ b/library/std/src/sys/thread_local/key/xous.rs @@ -186,6 +186,11 @@ pub unsafe fn destroy_tls() { }; } +// This is marked inline(never) to prevent dealloc calls from being reordered +// to after the TLS has been destroyed. +// See https://github.com/rust-lang/rust/pull/144465#pullrequestreview-3289729950 +// for more context. +#[inline(never)] unsafe fn run_dtors() { let mut any_run = true; From 8e073905444776a524328995c42e6ccf27193ecb Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 3 Oct 2025 01:34:09 +0200 Subject: [PATCH 9/9] Expand test with allocating from thread-local destructors --- .../threads-sendsync/tls-in-global-alloc.rs | 69 +++++++++++++++---- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/tests/ui/threads-sendsync/tls-in-global-alloc.rs b/tests/ui/threads-sendsync/tls-in-global-alloc.rs index 82a96ce9a6c3a..424e43ebccca0 100644 --- a/tests/ui/threads-sendsync/tls-in-global-alloc.rs +++ b/tests/ui/threads-sendsync/tls-in-global-alloc.rs @@ -2,27 +2,46 @@ //@ needs-threads use std::alloc::{GlobalAlloc, Layout, System}; -use std::thread::Thread; -use std::sync::atomic::{AtomicUsize, AtomicBool, Ordering}; +use std::hint::black_box; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::thread::{Thread, ThreadId}; static GLOBAL: AtomicUsize = AtomicUsize::new(0); +static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false); +static LOCAL_TRY_WITH_SUCCEEDED_ALLOC: AtomicBool = AtomicBool::new(false); +static LOCAL_TRY_WITH_SUCCEEDED_DEALLOC: AtomicBool = AtomicBool::new(false); -struct Local(Thread); +struct LocalForAllocatorWithoutDrop(ThreadId); -thread_local! { - static LOCAL: Local = { - GLOBAL.fetch_or(1, Ordering::Relaxed); - Local(std::thread::current()) - }; -} +struct LocalForAllocatorWithDrop(Thread); -impl Drop for Local { +impl Drop for LocalForAllocatorWithDrop { fn drop(&mut self) { GLOBAL.fetch_or(2, Ordering::Relaxed); } } -static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false); +struct LocalForUser(u32); + +impl Drop for LocalForUser { + // A user might call the global allocator in a thread-local drop. + fn drop(&mut self) { + self.0 += 1; + drop(black_box(Box::new(self.0))) + } +} + +thread_local! { + static LOCAL_FOR_USER0: LocalForUser = LocalForUser(0); + static LOCAL_FOR_ALLOCATOR_WITHOUT_DROP: LocalForAllocatorWithoutDrop = { + LocalForAllocatorWithoutDrop(std::thread::current().id()) + }; + static LOCAL_FOR_ALLOCATOR_WITH_DROP: LocalForAllocatorWithDrop = { + GLOBAL.fetch_or(1, Ordering::Relaxed); + LocalForAllocatorWithDrop(std::thread::current()) + }; + static LOCAL_FOR_USER1: LocalForUser = LocalForUser(1); +} #[global_allocator] static ALLOC: Alloc = Alloc; @@ -33,9 +52,19 @@ unsafe impl GlobalAlloc for Alloc { // Make sure we aren't re-entrant. assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); - LOCAL.with(|local| { + + // Should be infallible. + LOCAL_FOR_ALLOCATOR_WITHOUT_DROP.with(|local| { + assert!(local.0 == std::thread::current().id()); + }); + + // May fail once thread-local destructors start running, and ours has + // been ran. + let try_with_ret = LOCAL_FOR_ALLOCATOR_WITH_DROP.try_with(|local| { assert!(local.0.id() == std::thread::current().id()); }); + LOCAL_TRY_WITH_SUCCEEDED_ALLOC.fetch_or(try_with_ret.is_ok(), Ordering::Relaxed); + let ret = unsafe { System.alloc(layout) }; SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); ret @@ -45,9 +74,19 @@ unsafe impl GlobalAlloc for Alloc { // Make sure we aren't re-entrant. assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); - LOCAL.with(|local| { + + // Should be infallible. + LOCAL_FOR_ALLOCATOR_WITHOUT_DROP.with(|local| { + assert!(local.0 == std::thread::current().id()); + }); + + // May fail once thread-local destructors start running, and ours has + // been ran. + let try_with_ret = LOCAL_FOR_ALLOCATOR_WITH_DROP.try_with(|local| { assert!(local.0.id() == std::thread::current().id()); }); + LOCAL_TRY_WITH_SUCCEEDED_DEALLOC.fetch_or(try_with_ret.is_ok(), Ordering::Relaxed); + unsafe { System.dealloc(ptr, layout) } SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); } @@ -55,10 +94,14 @@ unsafe impl GlobalAlloc for Alloc { fn main() { std::thread::spawn(|| { + LOCAL_FOR_USER0.with(|l| assert!(l.0 == 0)); std::hint::black_box(vec![1, 2]); assert!(GLOBAL.load(Ordering::Relaxed) == 1); + LOCAL_FOR_USER1.with(|l| assert!(l.0 == 1)); }) .join() .unwrap(); assert!(GLOBAL.load(Ordering::Relaxed) == 3); + assert!(LOCAL_TRY_WITH_SUCCEEDED_ALLOC.load(Ordering::Relaxed)); + assert!(LOCAL_TRY_WITH_SUCCEEDED_DEALLOC.load(Ordering::Relaxed)); }