|
1 | 1 | #[cfg(test)] |
2 | 2 | const MAX_COMPARE_SIZE: usize = 256; |
3 | 3 |
|
| 4 | +#[inline(always)] |
4 | 5 | pub fn compare256_slice(src0: &[u8], src1: &[u8]) -> usize { |
5 | 6 | let src0 = first_chunk::<_, 256>(src0).unwrap(); |
6 | 7 | let src1 = first_chunk::<_, 256>(src1).unwrap(); |
7 | 8 |
|
8 | 9 | compare256(src0, src1) |
9 | 10 | } |
10 | 11 |
|
| 12 | +#[inline(always)] |
11 | 13 | fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize { |
12 | | - #[cfg(target_arch = "x86_64")] |
13 | | - if crate::cpu_features::is_enabled_avx2() { |
14 | | - return unsafe { avx2::compare256(src0, src1) }; |
15 | | - } |
| 14 | + #[cfg(target_feature = "avx2")] |
| 15 | + return avx2::compare256(src0, src1); |
16 | 16 |
|
17 | | - #[cfg(target_arch = "aarch64")] |
18 | | - if crate::cpu_features::is_enabled_neon() { |
19 | | - return unsafe { neon::compare256(src0, src1) }; |
20 | | - } |
| 17 | + #[cfg(target_feature = "neon")] |
| 18 | + return neon::compare256(src0, src1); |
| 19 | + |
| 20 | + #[cfg(target_feature = "simd128")] |
| 21 | + return wasm32::compare256(src0, src1); |
| 22 | + |
| 23 | + #[allow(unreachable_code)] |
| 24 | + compare256_via_function_pointer(src0, src1) |
| 25 | +} |
| 26 | + |
| 27 | +#[inline(always)] |
| 28 | +fn compare256_via_function_pointer(src0: &[u8; 256], src1: &[u8; 256]) -> usize { |
| 29 | + use core::sync::atomic::{AtomicPtr, Ordering}; |
21 | 30 |
|
22 | | - #[cfg(target_arch = "wasm32")] |
23 | | - if crate::cpu_features::is_enabled_simd128() { |
24 | | - return wasm32::compare256(src0, src1); |
| 31 | + type F = unsafe fn(&[u8; 256], &[u8; 256]) -> usize; |
| 32 | + |
| 33 | + static PTR: AtomicPtr<()> = AtomicPtr::new(initializer as *mut ()); |
| 34 | + |
| 35 | + fn initializer(src0: &[u8; 256], src1: &[u8; 256]) -> usize { |
| 36 | + let ptr = match () { |
| 37 | + #[cfg(target_arch = "x86_64")] |
| 38 | + _ if crate::cpu_features::is_enabled_avx2() => avx2::compare256 as F, |
| 39 | + #[cfg(target_arch = "aarch64")] |
| 40 | + _ if crate::cpu_features::is_enabled_neon() => neon::compare256 as F, |
| 41 | + #[cfg(target_arch = "wasm32")] |
| 42 | + _ if crate::cpu_features::is_enabled_simd128() => wasm32::compare256 as F, |
| 43 | + _ => rust::compare256 as F, |
| 44 | + }; |
| 45 | + |
| 46 | + PTR.store(ptr as *mut (), Ordering::Relaxed); |
| 47 | + |
| 48 | + // Safety: we've validated the target feature requirements |
| 49 | + unsafe { ptr(src0, src1) } |
25 | 50 | } |
26 | 51 |
|
27 | | - rust::compare256(src0, src1) |
| 52 | + let ptr = PTR.load(Ordering::Relaxed); |
| 53 | + |
| 54 | + // Safety: we trust this function pointer (PTR is local to the function) |
| 55 | + let dynamic_compare256 = unsafe { core::mem::transmute::<*mut (), F>(ptr) }; |
| 56 | + |
| 57 | + // Safety: we've validated the target feature requirements |
| 58 | + unsafe { dynamic_compare256(src0, src1) } |
28 | 59 | } |
29 | 60 |
|
30 | 61 | pub fn compare256_rle_slice(byte: u8, src: &[u8]) -> usize { |
|
0 commit comments