|
7 | 7 | //! `GetModuleHandle` and `GetProcAddress` to look up DLL entry points at |
8 | 8 | //! runtime. |
9 | 9 | //! |
10 | | -//! This implementation uses a static initializer to look up the DLL entry |
11 | | -//! points. The CRT (C runtime) executes static initializers before `main` |
12 | | -//! is called (for binaries) and before `DllMain` is called (for DLLs). |
13 | | -//! This is the ideal time to look up DLL imports, because we are guaranteed |
14 | | -//! that no other threads will attempt to call these entry points. Thus, |
15 | | -//! we can look up the imports and store them in `static mut` fields |
16 | | -//! without any synchronization. |
| 10 | +//! This is implemented simply by storing a function pointer in an atomic. |
| 11 | +//! Loading and calling this function will have little or no overhead |
| 12 | +//! compared with calling any other dynamically imported function. |
17 | 13 | //! |
18 | | -//! This has an additional advantage: Because the DLL import lookup happens |
19 | | -//! at module initialization, the cost of these lookups is deterministic, |
20 | | -//! and is removed from the code paths that actually call the DLL imports. |
21 | | -//! That is, there is no unpredictable "cache miss" that occurs when calling |
22 | | -//! a DLL import. For applications that benefit from predictable delays, |
23 | | -//! this is a benefit. This also eliminates the comparison-and-branch |
24 | | -//! from the hot path. |
25 | | -//! |
26 | | -//! Currently, the standard library uses only a small number of dynamic |
27 | | -//! DLL imports. If this number grows substantially, then the cost of |
28 | | -//! performing all of the lookups at initialization time might become |
29 | | -//! substantial. |
30 | | -//! |
31 | | -//! The mechanism of registering a static initializer with the CRT is |
32 | | -//! documented in |
33 | | -//! [CRT Initialization](https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization?view=msvc-160). |
34 | | -//! It works by contributing a global symbol to the `.CRT$XCU` section. |
35 | | -//! The linker builds a table of all static initializer functions. |
36 | | -//! The CRT startup code then iterates that table, calling each |
37 | | -//! initializer function. |
38 | | -//! |
39 | | -//! # **WARNING!!* |
40 | | -//! The environment that a static initializer function runs in is highly |
41 | | -//! constrained. There are **many** restrictions on what static initializers |
42 | | -//! can safely do. Static initializer functions **MUST NOT** do any of the |
43 | | -//! following (this list is not comprehensive): |
44 | | -//! * touch any other static field that is used by a different static |
45 | | -//! initializer, because the order that static initializers run in |
46 | | -//! is not defined. |
47 | | -//! * call `LoadLibrary` or any other function that acquires the DLL |
48 | | -//! loader lock. |
49 | | -//! * call any Rust function or CRT function that touches any static |
50 | | -//! (global) state. |
| 14 | +//! The stored function pointer starts out as an importer function which will |
| 15 | +//! swap itself with the real function when it's called for the first time. If |
| 16 | +//! the real function can't be imported then a fallback function is used in its |
| 17 | +//! place. While this is low cost for the happy path (where the function is |
| 18 | +//! already loaded) it does mean there's some overhead the first time the |
| 19 | +//! function is called. In the worst case, multiple threads may all end up |
| 20 | +//! importing the same function unnecessarily. |
51 | 21 |
|
52 | 22 | use crate::ffi::{c_void, CStr}; |
53 | 23 | use crate::ptr::NonNull; |
@@ -85,39 +55,6 @@ pub(crate) const fn const_cstr_from_bytes(bytes: &'static [u8]) -> &'static CStr |
85 | 55 | unsafe { crate::ffi::CStr::from_bytes_with_nul_unchecked(bytes) } |
86 | 56 | } |
87 | 57 |
|
88 | | -#[used] |
89 | | -#[link_section = ".CRT$XCU"] |
90 | | -static INIT_TABLE_ENTRY: unsafe extern "C" fn() = init; |
91 | | - |
92 | | -/// This is where the magic preloading of symbols happens. |
93 | | -/// |
94 | | -/// Note that any functions included here will be unconditionally included in |
95 | | -/// the final binary, regardless of whether or not they're actually used. |
96 | | -/// |
97 | | -/// Therefore, this is limited to `compat_fn_optional` functions which must be |
98 | | -/// preloaded and any functions which may be more time sensitive, even for the first call. |
99 | | -unsafe extern "C" fn init() { |
100 | | - // There is no locking here. This code is executed before main() is entered, and |
101 | | - // is guaranteed to be single-threaded. |
102 | | - // |
103 | | - // DO NOT do anything interesting or complicated in this function! DO NOT call |
104 | | - // any Rust functions or CRT functions if those functions touch any global state, |
105 | | - // because this function runs during global initialization. For example, DO NOT |
106 | | - // do any dynamic allocation, don't call LoadLibrary, etc. |
107 | | - |
108 | | - if let Some(synch) = Module::new(c::SYNCH_API) { |
109 | | - // These are optional and so we must manually attempt to load them |
110 | | - // before they can be used. |
111 | | - c::WaitOnAddress::preload(synch); |
112 | | - c::WakeByAddressSingle::preload(synch); |
113 | | - } |
114 | | - |
115 | | - if let Some(kernel32) = Module::new(c::KERNEL32) { |
116 | | - // Preloading this means getting a precise time will be as fast as possible. |
117 | | - c::GetSystemTimePreciseAsFileTime::preload(kernel32); |
118 | | - } |
119 | | -} |
120 | | - |
121 | 58 | /// Represents a loaded module. |
122 | 59 | /// |
123 | 60 | /// Note that the modules std depends on must not be unloaded. |
@@ -151,7 +88,7 @@ impl Module { |
151 | 88 | macro_rules! compat_fn_with_fallback { |
152 | 89 | (pub static $module:ident: &CStr = $name:expr; $( |
153 | 90 | $(#[$meta:meta])* |
154 | | - pub fn $symbol:ident($($argname:ident: $argtype:ty),*) -> $rettype:ty $fallback_body:block |
| 91 | + $vis:vis fn $symbol:ident($($argname:ident: $argtype:ty),*) -> $rettype:ty $fallback_body:block |
155 | 92 | )*) => ( |
156 | 93 | pub static $module: &CStr = $name; |
157 | 94 | $( |
@@ -196,78 +133,72 @@ macro_rules! compat_fn_with_fallback { |
196 | 133 | $fallback_body |
197 | 134 | } |
198 | 135 |
|
199 | | - #[allow(unused)] |
200 | | - pub(in crate::sys) fn preload(module: Module) { |
201 | | - load_from_module(Some(module)); |
202 | | - } |
203 | | - |
204 | 136 | #[inline(always)] |
205 | 137 | pub unsafe fn call($($argname: $argtype),*) -> $rettype { |
206 | 138 | let func: F = mem::transmute(PTR.load(Ordering::Relaxed)); |
207 | 139 | func($($argname),*) |
208 | 140 | } |
209 | 141 | } |
210 | 142 | $(#[$meta])* |
211 | | - pub use $symbol::call as $symbol; |
| 143 | + $vis use $symbol::call as $symbol; |
212 | 144 | )*) |
213 | 145 | } |
214 | 146 |
|
215 | | -/// A function that either exists or doesn't. |
| 147 | +/// Optionally load `WaitOnAddress`. |
| 148 | +/// Unlike the dynamic loading described above, this does not have a fallback. |
216 | 149 | /// |
217 | | -/// NOTE: Optional functions must be preloaded in the `init` function above, or they will always be None. |
218 | | -macro_rules! compat_fn_optional { |
219 | | - (pub static $module:ident: &CStr = $name:expr; $( |
220 | | - $(#[$meta:meta])* |
221 | | - pub fn $symbol:ident($($argname:ident: $argtype:ty),*) -> $rettype:ty; |
222 | | - )*) => ( |
223 | | - pub static $module: &CStr = $name; |
224 | | - $( |
225 | | - $(#[$meta])* |
226 | | - pub mod $symbol { |
227 | | - #[allow(unused_imports)] |
228 | | - use super::*; |
229 | | - use crate::mem; |
230 | | - use crate::sync::atomic::{AtomicPtr, Ordering}; |
231 | | - use crate::sys::compat::Module; |
232 | | - use crate::ptr::{self, NonNull}; |
233 | | - |
234 | | - type F = unsafe extern "system" fn($($argtype),*) -> $rettype; |
235 | | - |
236 | | - /// `PTR` will either be `null()` or set to the loaded function. |
237 | | - static PTR: AtomicPtr<c_void> = AtomicPtr::new(ptr::null_mut()); |
238 | | - |
239 | | - /// Only allow access to the function if it has loaded successfully. |
240 | | - #[inline(always)] |
241 | | - #[cfg(not(miri))] |
242 | | - pub fn option() -> Option<F> { |
243 | | - unsafe { |
244 | | - NonNull::new(PTR.load(Ordering::Relaxed)).map(|f| mem::transmute(f)) |
245 | | - } |
246 | | - } |
247 | | - |
248 | | - // Miri does not understand the way we do preloading |
249 | | - // therefore load the function here instead. |
250 | | - #[cfg(miri)] |
251 | | - pub fn option() -> Option<F> { |
252 | | - let mut func = NonNull::new(PTR.load(Ordering::Relaxed)); |
253 | | - if func.is_none() { |
254 | | - unsafe { Module::new($module).map(preload) }; |
255 | | - func = NonNull::new(PTR.load(Ordering::Relaxed)); |
256 | | - } |
257 | | - unsafe { |
258 | | - func.map(|f| mem::transmute(f)) |
259 | | - } |
260 | | - } |
| 150 | +/// This is rexported from sys::c. You should prefer to import |
| 151 | +/// from there in case this changes again in the future. |
| 152 | +pub mod WaitOnAddress { |
| 153 | + use super::*; |
| 154 | + use crate::mem; |
| 155 | + use crate::ptr; |
| 156 | + use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; |
| 157 | + use crate::sys::c; |
| 158 | + |
| 159 | + static MODULE_NAME: &CStr = ansi_str!("api-ms-win-core-synch-l1-2-0"); |
| 160 | + static SYMBOL_NAME: &CStr = ansi_str!("WaitOnAddress"); |
| 161 | + |
| 162 | + // WaitOnAddress function signature. |
| 163 | + type F = unsafe extern "system" fn( |
| 164 | + Address: c::LPVOID, |
| 165 | + CompareAddress: c::LPVOID, |
| 166 | + AddressSize: c::SIZE_T, |
| 167 | + dwMilliseconds: c::DWORD, |
| 168 | + ); |
| 169 | + |
| 170 | + // A place to store the loaded function atomically. |
| 171 | + static WAIT_ON_ADDRESS: AtomicPtr<c_void> = AtomicPtr::new(ptr::null_mut()); |
| 172 | + |
| 173 | + // We can skip trying to load again if we already tried. |
| 174 | + static LOAD_MODULE: AtomicBool = AtomicBool::new(true); |
| 175 | + |
| 176 | + #[inline(always)] |
| 177 | + pub fn option() -> Option<F> { |
| 178 | + let f = WAIT_ON_ADDRESS.load(Ordering::Acquire); |
| 179 | + if !f.is_null() { Some(unsafe { mem::transmute(f) }) } else { try_load() } |
| 180 | + } |
261 | 181 |
|
262 | | - #[allow(unused)] |
263 | | - pub(in crate::sys) fn preload(module: Module) { |
264 | | - unsafe { |
265 | | - static SYMBOL_NAME: &CStr = ansi_str!(sym $symbol); |
266 | | - if let Some(f) = module.proc_address(SYMBOL_NAME) { |
267 | | - PTR.store(f.as_ptr(), Ordering::Relaxed); |
268 | | - } |
269 | | - } |
| 182 | + #[cold] |
| 183 | + fn try_load() -> Option<F> { |
| 184 | + if LOAD_MODULE.load(Ordering::Acquire) { |
| 185 | + // load the module |
| 186 | + let mut wait_on_address = None; |
| 187 | + if let Some(func) = try_load_inner() { |
| 188 | + WAIT_ON_ADDRESS.store(func.as_ptr(), Ordering::Release); |
| 189 | + wait_on_address = Some(unsafe { mem::transmute(func) }); |
270 | 190 | } |
| 191 | + // Don't try to load the module again even if loading failed. |
| 192 | + LOAD_MODULE.store(false, Ordering::Release); |
| 193 | + wait_on_address |
| 194 | + } else { |
| 195 | + None |
271 | 196 | } |
272 | | - )*) |
| 197 | + } |
| 198 | + |
| 199 | + // In the future this could be a `try` block but until then I think it's a |
| 200 | + // little bit cleaner as a separate function. |
| 201 | + fn try_load_inner() -> Option<NonNull<c_void>> { |
| 202 | + unsafe { Module::new(MODULE_NAME)?.proc_address(SYMBOL_NAME) } |
| 203 | + } |
273 | 204 | } |
0 commit comments