From cdfe3e7d88b3de05cb0ede132d410ed59cff4df2 Mon Sep 17 00:00:00 2001 From: Don MacAskill Date: Fri, 7 Nov 2025 23:28:37 -0800 Subject: [PATCH 1/3] Use Rust 1.89.0 as the baseline version 1.89.0 is when AVX-512 support was stabilized. Moving to 1.89.0 allows us to remove a lot of legacy code gated behind the `rustversion` crate, simplifying things and improving maintainability. --- .github/workflows/tests.yml | 12 +- Cargo.lock | 8 +- Cargo.toml | 8 +- src/arch/mod.rs | 42 +- src/arch/vpclmulqdq.rs | 629 ------------------ src/arch/x86_64/avx512.rs | 23 +- src/arch/x86_64/avx512_vpclmulqdq.rs | 22 - .../fusion/x86/iscsi/avx512_pclmulqdq.rs | 3 +- .../fusion/x86/iscsi/avx512_vpclmulqdq.rs | 4 +- src/crc32/fusion/x86/mod.rs | 39 +- src/feature_detection.rs | 285 +------- 11 files changed, 23 insertions(+), 1052 deletions(-) delete mode 100644 src/arch/vpclmulqdq.rs diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 17bcf6a..09e3b96 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,8 +12,7 @@ jobs: matrix: os: [ubuntu-22.04-arm, ubuntu-24.04-arm, macos-14, macos-15, macos-26, macos-latest, windows-11-arm] rust-toolchain: - - "1.81" # minimum for this crate - - "1.89" # when AVX-512 VPCLMULQDQ was stabilized + - "1.89" # minimum for this crate, when AVX-512 VPCLMULQDQ was stabilized - "stable" - "nightly" runs-on: ${{ matrix.os }} @@ -43,8 +42,7 @@ jobs: matrix: os: [ ubuntu-latest, ubuntu-22.04, ubuntu-24.04, macos-15-intel, windows-2022, windows-2025, windows-latest ] rust-toolchain: - - "1.81" # minimum for this crate - - "1.89" # when AVX-512 VPCLMULQDQ was stabilized + - "1.89" # minimum for this crate, when AVX-512 VPCLMULQDQ was stabilized - "stable" - "nightly" runs-on: ${{ matrix.os }} @@ -75,8 +73,7 @@ jobs: matrix: target: [i586-unknown-linux-gnu, i686-unknown-linux-gnu] rust-toolchain: - - "1.81" # minimum for this crate - - "1.89" # when AVX-512 VPCLMULQDQ was stabilized + - "1.89" # minimum for this crate, when AVX-512 VPCLMULQDQ was stabilized - "stable" - "nightly" steps: @@ -101,8 +98,7 @@ jobs: matrix: target: [powerpc-unknown-linux-gnu, powerpc64-unknown-linux-gnu] rust-toolchain: - - "1.81" # minimum for this crate - - "1.89" # when AVX-512 VPCLMULQDQ was stabilized + - "1.89" # minimum for this crate, when AVX-512 VPCLMULQDQ was stabilized - "stable" - "nightly" steps: diff --git a/Cargo.lock b/Cargo.lock index a2ddb79..ec36ad1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -208,10 +208,8 @@ dependencies = [ "crc", "criterion", "digest", - "indexmap", "rand", "regex", - "rustversion", ] [[package]] @@ -373,9 +371,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "indexmap" -version = "2.11.4" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", "hashbrown", diff --git a/Cargo.toml b/Cargo.toml index cc10da6..a758bc1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ readme = "README.md" # 1.69.0 added VPCLMULQDQ x86 detection support # 1.70.0 added LLVM 16 which supports PMULL2 on Aarch64 # 1.89.0 stabilized AVX-512 intrinsics, including VPCLMULQDQ -rust-version = "1.81" +rust-version = "1.89" [lib] name = "crc_fast" @@ -25,12 +25,6 @@ bench = true crc = "3" digest = { version = "0.10", features = ["alloc"] } -# will be removed once Rust 1.89 is the minimum supported version -rustversion = "1.0" - -# constrain indexmap (transitive) to a version compatible with Rust 1.81.0 -indexmap = { version = ">=2.11.0, <2.12.0", optional = true } - [dev-dependencies] criterion = "0.7" cbindgen = "0.29" diff --git a/src/arch/mod.rs b/src/arch/mod.rs index aef965d..0d31dac 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -85,11 +85,10 @@ unsafe fn update_aarch64_aes_sha3( } } -/// Main entry point for x86/x86_64 (Rust 1.89+ which supports AVX-512) +/// Main entry point for x86/x86_64 /// /// # Safety /// May use native CPU features -#[rustversion::since(1.89)] #[inline(always)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 { @@ -123,45 +122,6 @@ pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 } } -/// Main entry point for x86/x86_64 (Rust < 1.89 with no AVX-512 support) -/// -/// # Safety -/// May use native CPU features -#[rustversion::before(1.89)] -#[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 { - use crate::feature_detection::{get_arch_ops, ArchOpsInstance}; - - match get_arch_ops() { - ArchOpsInstance::X86SsePclmulqdq(ops) => match params.width { - 64 => algorithm::update::<_, Width64>(state, bytes, params, ops), - 32 => algorithm::update::<_, Width32>(state as u32, bytes, params, ops) as u64, - _ => panic!("Unsupported CRC width: {}", params.width), - }, - ArchOpsInstance::SoftwareFallback => x86_software_update(state, bytes, params), - } -} - -#[inline(always)] -#[allow(unused)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -fn x86_software_update(state: u64, bytes: &[u8], params: CrcParams) -> u64 { - if !is_x86_feature_detected!("sse4.1") || !is_x86_feature_detected!("pclmulqdq") { - #[cfg(all( - target_arch = "x86", - any(not(target_feature = "sse4.1"), not(target_feature = "pclmulqdq")) - ))] - { - // Use software implementation when no SIMD support is available - crate::arch::software::update(state, bytes, params); - } - } - - // This should never happen, but just in case - panic!("x86 features missing (SSE4.1 && PCLMULQDQ)"); -} - #[inline] #[cfg(all( not(target_arch = "x86"), diff --git a/src/arch/vpclmulqdq.rs b/src/arch/vpclmulqdq.rs deleted file mode 100644 index 632fc65..0000000 --- a/src/arch/vpclmulqdq.rs +++ /dev/null @@ -1,629 +0,0 @@ -// Copyright 2025 Don MacAskill. Licensed under MIT or Apache-2.0. - -//! This module provides AVX-512 and VPCLMULQDQ-specific implementations of the ArchOps trait. -//! -//! It performs folding using 4 x ZMM registers of 512-bits each. - -#![cfg(target_arch = "x86_64")] - -#[rustversion::since(1.89)] -use crate::arch::x86::X86Ops; - -#[rustversion::since(1.89)] -use crate::enums::Reflector; - -#[rustversion::since(1.89)] -use crate::structs::CrcState; - -#[rustversion::since(1.89)] -use crate::traits::{ArchOps, EnhancedCrcWidth}; - -#[rustversion::since(1.89)] -use std::arch::x86_64::*; - -#[rustversion::since(1.89)] -use std::ops::BitXor; - -/// Implements the ArchOps trait using 512-bit AVX-512 and VPCLMULQDQ instructions at 512 bits. -/// Delegates to X86Ops for standard 128-bit operations -#[rustversion::since(1.89)] -#[derive(Debug, Copy, Clone)] -pub struct Vpclmulqdq512Ops(X86Ops); - -#[rustversion::since(1.89)] -impl Vpclmulqdq512Ops { - #[inline(always)] - pub fn new() -> Self { - Self(X86Ops) - } -} - -// Wrapper for __m512i to make it easier to work with -#[rustversion::since(1.89)] -#[derive(Debug, Copy, Clone)] -struct Simd512(__m512i); - -#[rustversion::since(1.89)] -impl Simd512 { - #[inline] - #[target_feature(enable = "avx512f")] - #[allow(clippy::too_many_arguments)] - unsafe fn new(x7: u64, x6: u64, x5: u64, x4: u64, x3: u64, x2: u64, x1: u64, x0: u64) -> Self { - Self(_mm512_set_epi64( - x7 as i64, x6 as i64, x5 as i64, x4 as i64, x3 as i64, x2 as i64, x1 as i64, x0 as i64, - )) - } - - #[inline] - #[target_feature(enable = "avx512f,avx512vl,vpclmulqdq")] - unsafe fn fold_64(&self, coeff: &Self, new_data: &Self) -> Self { - // Use 512-bit ternary logic XOR3 with carryless multiplication - Self(_mm512_ternarylogic_epi64( - _mm512_clmulepi64_epi128(self.0, coeff.0, 0), // Low parts - _mm512_clmulepi64_epi128(self.0, coeff.0, 17), // High parts - new_data.0, - 0x96, // XOR3 operation - )) - } - - #[inline] - #[target_feature(enable = "avx512f")] - unsafe fn extract_u64s(&self) -> [u64; 8] { - let mut result = [0u64; 8]; - _mm512_storeu_si512(result.as_mut_ptr().cast(), self.0); - - result - } - - #[inline] - #[target_feature(enable = "avx512f")] - unsafe fn load_from_ptr(ptr: *const u8) -> Self { - Self(_mm512_loadu_si512(ptr as *const __m512i)) - } - - #[inline] - #[target_feature(enable = "avx512f")] - unsafe fn to_128i_extract(self) -> __m128i { - _mm512_extracti32x4_epi32(self.0, INDEX) - } - - #[inline] - #[target_feature(enable = "avx512f")] - unsafe fn xor(&self, other: &Self) -> Self { - Self(_mm512_xor_si512(self.0, other.0)) - } - - #[inline] - #[target_feature(enable = "avx512f")] - #[allow(unused)] - unsafe fn print_hex(&self, prefix: &str) { - let values = self.extract_u64s(); - println!( - "{}={:#016x}_{:016x}_{:016x}_{:016x}_{:016x}_{:016x}_{:016x}_{:016x}", - prefix, - values[7], - values[6], - values[5], - values[4], - values[3], - values[2], - values[1], - values[0] - ); - } -} - -#[rustversion::since(1.89)] -impl Vpclmulqdq512Ops { - /// Process aligned blocks using VPCLMULQDQ with 4 x 512-bit registers - /// - /// Note that #[inline(always)] loses the inlining performance boost, despite no native - /// target_features being used directly. Odd since that's not how Rust's docs make it sound... - #[inline] - #[target_feature(enable = "ssse3,avx2,avx512f,avx512vl,avx512bw,vpclmulqdq,pclmulqdq")] - unsafe fn process_blocks( - &self, - state: &mut CrcState<::Vector>, - first: &[__m128i; 8], - rest: &[[__m128i; 8]], - keys: [u64; 23], - reflected: bool, - ) -> W::Value - where - W::Value: Copy + BitXor, - { - let state_u64s = self.extract_u64s(state.value); - - let positioned_state = if reflected { - Simd512::new(0, 0, 0, 0, 0, 0, 0, state_u64s[0]) - } else { - Simd512::new(state_u64s[1], 0, 0, 0, 0, 0, 0, 0) - }; - - let reflector = create_reflector512(reflected); - - // Load first 256 bytes (2nd half is rest[0] since these are 128-byte blocks) - let first_ptr = first.as_ptr() as *const u8; - let first_rest_ptr = rest[0].as_ptr() as *const u8; - - let mut x = [ - reflect_bytes512(&reflector, Simd512::load_from_ptr(first_ptr)), - reflect_bytes512(&reflector, Simd512::load_from_ptr(first_ptr.add(64))), - reflect_bytes512(&reflector, Simd512::load_from_ptr(first_rest_ptr)), - reflect_bytes512(&reflector, Simd512::load_from_ptr(first_rest_ptr.add(64))), - ]; - - x[0] = positioned_state.xor(&x[0]); - - let coeff = self.create_avx512_256byte_coefficient(keys, reflected); - - let remaining_rest = &rest[1..]; - let pair_count = remaining_rest.len() / 2; - - for i in 0..pair_count { - let block1_ptr = remaining_rest[i * 2].as_ptr() as *const u8; - let block2_ptr = remaining_rest[i * 2 + 1].as_ptr() as *const u8; - - x[0] = x[0].fold_64( - &coeff, - &reflect_bytes512(&reflector, Simd512::load_from_ptr(block1_ptr)), - ); - x[1] = x[1].fold_64( - &coeff, - &reflect_bytes512(&reflector, Simd512::load_from_ptr(block1_ptr.add(64))), - ); - x[2] = x[2].fold_64( - &coeff, - &reflect_bytes512(&reflector, Simd512::load_from_ptr(block2_ptr)), - ); - x[3] = x[3].fold_64( - &coeff, - &reflect_bytes512(&reflector, Simd512::load_from_ptr(block2_ptr.add(64))), - ); - } - - let processed_pairs = pair_count * 2; - let remaining_single_count = remaining_rest.len() - processed_pairs; - - if remaining_single_count > 0 { - // We have 1 unprocessed block (128 bytes) - // Fold 4×512 down to 2×512 and process the remaining block with 2-register mode - let folded_2reg = self.fold_from_4x512_to_2x256(x, keys, reflected); - let coeff_2reg = self.create_avx512_128byte_coefficient(keys, reflected); - - let last_block_ptr = remaining_rest[processed_pairs].as_ptr() as *const u8; - - let final_x = [ - folded_2reg[0].fold_64( - &coeff_2reg, - &reflect_bytes512(&reflector, Simd512::load_from_ptr(last_block_ptr)), - ), - folded_2reg[1].fold_64( - &coeff_2reg, - &reflect_bytes512(&reflector, Simd512::load_from_ptr(last_block_ptr.add(64))), - ), - ]; - - let folded = self.fold_from_2x512_to_1x128(final_x, keys, reflected); - - return W::perform_final_reduction(folded, reflected, keys, self); - } - - // All blocks processed in pairs - fold from 4 x 512-bit to 1 x 128-bit - let folded = self.fold_from_4x512_to_1x128(x, keys, reflected); - - W::perform_final_reduction(folded, reflected, keys, self) - } - - /// Create a folding coefficient for AVX-512 for 128-byte folding distances - #[inline(always)] - unsafe fn create_avx512_128byte_coefficient( - &self, - keys: [u64; 23], - reflected: bool, - ) -> Simd512 { - let (k1, k2) = if reflected { - (keys[3], keys[4]) - } else { - (keys[4], keys[3]) - }; - - // Replicate the coefficient pair - Simd512::new(k1, k2, k1, k2, k1, k2, k1, k2) - } - - /// Create a folding coefficient for AVX-512 for 256-byte folding distances - #[inline(always)] - unsafe fn create_avx512_256byte_coefficient( - &self, - keys: [u64; 23], - reflected: bool, - ) -> Simd512 { - let (k1, k2) = if reflected { - (keys[21], keys[22]) - } else { - (keys[22], keys[21]) - }; - - // Replicate the coefficient pair - Simd512::new(k1, k2, k1, k2, k1, k2, k1, k2) - } - - /// Fold from 4 x 512-bit to 1 x 128-bit - #[inline(always)] - unsafe fn fold_from_4x512_to_1x128( - &self, - x: [Simd512; 4], - keys: [u64; 23], - reflected: bool, - ) -> __m128i { - // Step 1: Fold 4 x 512-bit to 2 x 512-bit - let x2 = self.fold_from_4x512_to_2x256(x, keys, reflected); - - // Step 2: Fold 2 x 512-bit to 1 x 128-bit - self.fold_from_2x512_to_1x128(x2, keys, reflected) - } - - /// Fold from 4 x 512-bit to 2 x 512-bit - #[inline(always)] - unsafe fn fold_from_4x512_to_2x256( - &self, - x: [Simd512; 4], - keys: [u64; 23], - reflected: bool, - ) -> [Simd512; 2] { - // This folds registers that are 128 bytes apart (x[0] with x[2], x[1] with x[3]) - let coeff = self.create_avx512_128byte_coefficient(keys, reflected); - - // Fold pairs: - // x[0] (bytes 0-63) + x[2] (bytes 128-191) → result[0] - // x[1] (bytes 64-127) + x[3] (bytes 192-255) → result[1] - [x[0].fold_64(&coeff, &x[2]), x[1].fold_64(&coeff, &x[3])] - } - - /// Fold from 2 x 512-bit to 1 x 128-bit - #[inline(always)] - unsafe fn fold_from_2x512_to_1x128( - &self, - x: [Simd512; 2], - keys: [u64; 23], - reflected: bool, - ) -> __m128i { - // Create the fold coefficients for different distances - let fold_coefficients = [ - self.create_vector_from_u64_pair(keys[10], keys[9], reflected), // 112 bytes - self.create_vector_from_u64_pair(keys[12], keys[11], reflected), // 96 bytes - self.create_vector_from_u64_pair(keys[14], keys[13], reflected), // 80 bytes - self.create_vector_from_u64_pair(keys[16], keys[15], reflected), // 64 bytes - self.create_vector_from_u64_pair(keys[18], keys[17], reflected), // 48 bytes - self.create_vector_from_u64_pair(keys[20], keys[19], reflected), // 32 bytes - self.create_vector_from_u64_pair(keys[2], keys[1], reflected), // 16 bytes - ]; - - // Extract the 8 x 128-bit vectors from the 2 x 512-bit vectors (this is faster than - // using 256-bit intrinsics for 1KiB payloads) - let v128 = if reflected { - [ - x[0].to_128i_extract::<0>(), // 256-x0.low - x[0].to_128i_extract::<1>(), // 256-x0.high - x[0].to_128i_extract::<2>(), // 256-x1.low - x[0].to_128i_extract::<3>(), // 256-x1.high - x[1].to_128i_extract::<0>(), // 256-x2.low - x[1].to_128i_extract::<1>(), // 256-x2.high - x[1].to_128i_extract::<2>(), // 256-x3.low - x[1].to_128i_extract::<3>(), // 256-x3.high - ] - } else { - [ - x[0].to_128i_extract::<3>(), // 256-x1.high - x[0].to_128i_extract::<2>(), // 256-x1.low - x[0].to_128i_extract::<1>(), // 256-x0.high - x[0].to_128i_extract::<0>(), // 256-x0.low - x[1].to_128i_extract::<3>(), // 256-x3.high - x[1].to_128i_extract::<2>(), // 256-x3.low - x[1].to_128i_extract::<1>(), // 256-x2.high - x[1].to_128i_extract::<0>(), // 256-x2.low - ] - }; - - // Fold the 8 xmm registers to 1 xmm register - let mut res = v128[7]; - - for (i, &coeff) in fold_coefficients.iter().enumerate() { - let folded_h = self.carryless_mul_00(v128[i], coeff); - let folded_l = self.carryless_mul_11(v128[i], coeff); - res = self.xor3_vectors(folded_h, folded_l, res); - } - - res - } -} - -// 512-bit version of the Reflector -#[rustversion::since(1.89)] -#[derive(Clone, Copy)] -enum Reflector512 { - NoReflector, - ForwardReflector { smask: Simd512 }, -} - -// Function to create the appropriate reflector based on CRC parameters -#[rustversion::since(1.89)] -#[inline(always)] -unsafe fn create_reflector512(reflected: bool) -> Reflector512 { - if reflected { - Reflector512::NoReflector - } else { - // Load shuffle mask - let smask = Simd512::new( - 0x08090a0b0c0d0e0f, - 0x0001020304050607, - 0x08090a0b0c0d0e0f, - 0x0001020304050607, - 0x08090a0b0c0d0e0f, - 0x0001020304050607, - 0x08090a0b0c0d0e0f, - 0x0001020304050607, - ); - Reflector512::ForwardReflector { smask } - } -} - -// Function to apply reflection to a 512-bit vector -#[rustversion::since(1.89)] -#[inline(always)] -unsafe fn reflect_bytes512(reflector: &Reflector512, data: Simd512) -> Simd512 { - match reflector { - Reflector512::NoReflector => data, - Reflector512::ForwardReflector { smask } => shuffle_bytes512(data, *smask), - } -} - -// pre-compute the reverse indices for 512-bit shuffling -#[rustversion::since(1.89)] -static REVERSE_INDICES_512: __m512i = - unsafe { std::mem::transmute([7u64, 6u64, 5u64, 4u64, 3u64, 2u64, 1u64, 0u64]) }; - -// Implement a 512-bit byte shuffle function -#[rustversion::since(1.89)] -#[inline] -#[target_feature(enable = "avx512f,avx512bw")] -unsafe fn shuffle_bytes512(data: Simd512, mask: Simd512) -> Simd512 { - Simd512(_mm512_permutexvar_epi64( - // Reverse the order using 512-bit permutation - REVERSE_INDICES_512, // reverse indices - _mm512_shuffle_epi8(data.0, mask.0), // shuffled data - )) -} - -// Delegate all ArchOps methods to the inner X86Ops instance -#[rustversion::since(1.89)] -impl ArchOps for Vpclmulqdq512Ops { - type Vector = __m128i; - - #[inline(always)] - unsafe fn process_enhanced_simd_blocks( - &self, - state: &mut CrcState, - first: &[Self::Vector; 8], - rest: &[[Self::Vector; 8]], - _reflector: &Reflector, - keys: [u64; 23], - ) -> bool - where - Self::Vector: Copy, - { - // Update the state with the result - *state = W::create_state( - self.process_blocks::(state, first, rest, keys, state.reflected), - state.reflected, - self, - ); - - // Return true to indicate we handled it - true - } - - // Delegate all other methods to X86Ops - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn create_vector_from_u64_pair( - &self, - high: u64, - low: u64, - reflected: bool, - ) -> Self::Vector { - self.0.create_vector_from_u64_pair(high, low, reflected) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn create_vector_from_u64_pair_non_reflected( - &self, - high: u64, - low: u64, - ) -> Self::Vector { - self.0.create_vector_from_u64_pair_non_reflected(high, low) - } - - #[inline] - #[target_feature(enable = "sse4.1")] - unsafe fn create_vector_from_u64(&self, value: u64, high: bool) -> Self::Vector { - self.0.create_vector_from_u64(value, high) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn extract_u64s(&self, vector: Self::Vector) -> [u64; 2] { - self.0.extract_u64s(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn extract_poly64s(&self, vector: Self::Vector) -> [u64; 2] { - self.0.extract_poly64s(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn xor_vectors(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector { - self.0.xor_vectors(a, b) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn load_bytes(&self, ptr: *const u8) -> Self::Vector { - self.0.load_bytes(ptr) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn load_aligned(&self, ptr: *const [u64; 2]) -> Self::Vector { - self.0.load_aligned(ptr) - } - - #[inline] - #[target_feature(enable = "ssse3")] - unsafe fn shuffle_bytes(&self, data: Self::Vector, mask: Self::Vector) -> Self::Vector { - self.0.shuffle_bytes(data, mask) - } - - #[inline] - #[target_feature(enable = "sse4.1")] - unsafe fn blend_vectors( - &self, - a: Self::Vector, - b: Self::Vector, - mask: Self::Vector, - ) -> Self::Vector { - self.0.blend_vectors(a, b, mask) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_left_8(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_left_8(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn set_all_bytes(&self, value: u8) -> Self::Vector { - self.0.set_all_bytes(value) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn create_compare_mask(&self, vector: Self::Vector) -> Self::Vector { - self.0.create_compare_mask(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn and_vectors(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector { - self.0.and_vectors(a, b) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_right_32(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_right_32(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_left_32(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_left_32(vector) - } - - #[inline] - #[target_feature(enable = "sse4.1")] - unsafe fn create_vector_from_u32(&self, value: u32, high: bool) -> Self::Vector { - self.0.create_vector_from_u32(value, high) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_left_4(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_left_4(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_right_4(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_right_4(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_right_8(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_right_8(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_right_5(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_right_5(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_right_6(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_right_6(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_right_7(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_right_7(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_right_12(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_right_12(vector) - } - - #[inline] - #[target_feature(enable = "sse2")] - unsafe fn shift_left_12(&self, vector: Self::Vector) -> Self::Vector { - self.0.shift_left_12(vector) - } - - #[inline] - #[target_feature(enable = "pclmulqdq")] - unsafe fn carryless_mul_00(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector { - self.0.carryless_mul_00(a, b) - } - - #[inline] - #[target_feature(enable = "pclmulqdq")] - unsafe fn carryless_mul_01(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector { - self.0.carryless_mul_01(a, b) - } - - #[inline] - #[target_feature(enable = "pclmulqdq")] - unsafe fn carryless_mul_10(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector { - self.0.carryless_mul_10(a, b) - } - - #[inline] - #[target_feature(enable = "pclmulqdq")] - unsafe fn carryless_mul_11(&self, a: Self::Vector, b: Self::Vector) -> Self::Vector { - self.0.carryless_mul_11(a, b) - } - - #[inline] - #[target_feature(enable = "avx512f,avx512vl")] - unsafe fn xor3_vectors( - &self, - a: Self::Vector, - b: Self::Vector, - c: Self::Vector, - ) -> Self::Vector { - // Use AVX-512 ternary logic when available - _mm_ternarylogic_epi64( - a, b, c, 0x96, // XOR3 - ) - } -} diff --git a/src/arch/x86_64/avx512.rs b/src/arch/x86_64/avx512.rs index d26ec2f..f68b3cc 100644 --- a/src/arch/x86_64/avx512.rs +++ b/src/arch/x86_64/avx512.rs @@ -6,21 +6,15 @@ #![cfg(target_arch = "x86_64")] -#[rustversion::since(1.89)] -use std::arch::x86_64::*; - -#[rustversion::since(1.89)] use crate::arch::x86::sse::X86SsePclmulqdqOps; -#[rustversion::since(1.89)] use crate::traits::ArchOps; +use std::arch::x86_64::*; /// x86_64-only AVX512+PCLMULQDQ tier - delegates to SSE tier and overrides XOR3 operations /// Uses AVX512 ternary logic for XOR3 operations with PCLMULQDQ -#[rustversion::since(1.89)] #[derive(Debug, Copy, Clone)] pub struct X86_64Avx512PclmulqdqOps(X86SsePclmulqdqOps); -#[rustversion::since(1.89)] impl X86_64Avx512PclmulqdqOps { #[inline(always)] pub fn new() -> Self { @@ -28,7 +22,6 @@ impl X86_64Avx512PclmulqdqOps { } } -#[rustversion::since(1.89)] impl ArchOps for X86_64Avx512PclmulqdqOps { type Vector = __m128i; @@ -192,7 +185,6 @@ impl ArchOps for X86_64Avx512PclmulqdqOps { self.0.carryless_mul_11(a, b) } - #[rustversion::since(1.89)] #[inline] #[target_feature(enable = "avx512vl")] unsafe fn xor3_vectors( @@ -204,17 +196,4 @@ impl ArchOps for X86_64Avx512PclmulqdqOps { // AVX512 tier always uses ternary logic _mm_ternarylogic_epi64(a, b, c, 0x96) // XOR3 operation } - - // Fallback for older Rust versions - #[rustversion::before(1.89)] - #[inline(always)] - unsafe fn xor3_vectors( - &self, - a: Self::Vector, - b: Self::Vector, - c: Self::Vector, - ) -> Self::Vector { - // Rust < 1.89 doesn't have _mm_ternarylogic_epi64, fall back to SSE - self.0.xor3_vectors(a, b, c) - } } diff --git a/src/arch/x86_64/avx512_vpclmulqdq.rs b/src/arch/x86_64/avx512_vpclmulqdq.rs index 881ca7e..9f9fe9e 100644 --- a/src/arch/x86_64/avx512_vpclmulqdq.rs +++ b/src/arch/x86_64/avx512_vpclmulqdq.rs @@ -6,31 +6,18 @@ #![cfg(target_arch = "x86_64")] -#[rustversion::since(1.89)] use crate::arch::x86::sse::X86SsePclmulqdqOps; - -#[rustversion::since(1.89)] use crate::enums::Reflector; - -#[rustversion::since(1.89)] use crate::structs::CrcState; - -#[rustversion::since(1.89)] use crate::traits::{ArchOps, EnhancedCrcWidth}; - -#[rustversion::since(1.89)] use std::arch::x86_64::*; - -#[rustversion::since(1.89)] use std::ops::BitXor; /// Implements the ArchOps trait using 512-bit AVX-512 and VPCLMULQDQ instructions at 512 bits. /// Delegates to X86SsePclmulqdqOps for standard 128-bit operations -#[rustversion::since(1.89)] #[derive(Debug, Copy, Clone)] pub struct X86_64Avx512VpclmulqdqOps(X86SsePclmulqdqOps); -#[rustversion::since(1.89)] impl X86_64Avx512VpclmulqdqOps { #[inline(always)] pub fn new() -> Self { @@ -39,11 +26,9 @@ impl X86_64Avx512VpclmulqdqOps { } // Wrapper for __m512i to make it easier to work with -#[rustversion::since(1.89)] #[derive(Debug, Copy, Clone)] struct Simd512(__m512i); -#[rustversion::since(1.89)] impl Simd512 { #[inline] #[target_feature(enable = "avx512f")] @@ -113,7 +98,6 @@ impl Simd512 { } } -#[rustversion::since(1.89)] impl X86_64Avx512VpclmulqdqOps { /// Process aligned blocks using VPCLMULQDQ with 4 x 512-bit registers /// @@ -340,7 +324,6 @@ impl X86_64Avx512VpclmulqdqOps { } // 512-bit version of the Reflector -#[rustversion::since(1.89)] #[derive(Clone, Copy)] enum Reflector512 { NoReflector, @@ -348,7 +331,6 @@ enum Reflector512 { } // Function to create the appropriate reflector based on CRC parameters -#[rustversion::since(1.89)] #[inline(always)] unsafe fn create_reflector512(reflected: bool) -> Reflector512 { if reflected { @@ -370,7 +352,6 @@ unsafe fn create_reflector512(reflected: bool) -> Reflector512 { } // Function to apply reflection to a 512-bit vector -#[rustversion::since(1.89)] #[inline(always)] unsafe fn reflect_bytes512(reflector: &Reflector512, data: Simd512) -> Simd512 { match reflector { @@ -380,12 +361,10 @@ unsafe fn reflect_bytes512(reflector: &Reflector512, data: Simd512) -> Simd512 { } // pre-compute the reverse indices for 512-bit shuffling -#[rustversion::since(1.89)] static REVERSE_INDICES_512: __m512i = unsafe { std::mem::transmute([7u64, 6u64, 5u64, 4u64, 3u64, 2u64, 1u64, 0u64]) }; // Implement a 512-bit byte shuffle function -#[rustversion::since(1.89)] #[inline] #[target_feature(enable = "avx512f,avx512bw")] unsafe fn shuffle_bytes512(data: Simd512, mask: Simd512) -> Simd512 { @@ -397,7 +376,6 @@ unsafe fn shuffle_bytes512(data: Simd512, mask: Simd512) -> Simd512 { } // Delegate all ArchOps methods to the inner X86SsePclmulqdqOps instance -#[rustversion::since(1.89)] impl ArchOps for X86_64Avx512VpclmulqdqOps { type Vector = __m128i; diff --git a/src/crc32/fusion/x86/iscsi/avx512_pclmulqdq.rs b/src/crc32/fusion/x86/iscsi/avx512_pclmulqdq.rs index b3bc0ba..239e183 100644 --- a/src/crc32/fusion/x86/iscsi/avx512_pclmulqdq.rs +++ b/src/crc32/fusion/x86/iscsi/avx512_pclmulqdq.rs @@ -12,8 +12,7 @@ /// /// Modified as necessary for this Rust implementation. /// -/// Uses AVX-512 instructions so only available after Rust 1.89 (when AVX-512 stabilized) -#[rustversion::since(1.89)] +/// Uses AVX-512 instructions #[inline] #[target_feature(enable = "avx512vl,pclmulqdq")] pub unsafe fn crc32_iscsi_avx512_v4s3x3(mut crc0: u32, mut buf: *const u8, mut len: usize) -> u32 { diff --git a/src/crc32/fusion/x86/iscsi/avx512_vpclmulqdq.rs b/src/crc32/fusion/x86/iscsi/avx512_vpclmulqdq.rs index 56a7cb0..e4e1e76 100644 --- a/src/crc32/fusion/x86/iscsi/avx512_vpclmulqdq.rs +++ b/src/crc32/fusion/x86/iscsi/avx512_vpclmulqdq.rs @@ -12,9 +12,7 @@ /// /// Modified as necessary for this Rust implementation. /// -/// Uses AVX-512 VPCLMULQDQ instructions, so only available after Rust 1.89 (when AVX-512 -/// stabilized) -#[rustversion::since(1.89)] +/// Uses AVX-512 VPCLMULQDQ instructions #[inline] #[target_feature(enable = "avx512vl,vpclmulqdq")] pub unsafe fn crc32_iscsi_avx512_vpclmulqdq_v3x2( diff --git a/src/crc32/fusion/x86/mod.rs b/src/crc32/fusion/x86/mod.rs index a8043b3..6c4e1da 100644 --- a/src/crc32/fusion/x86/mod.rs +++ b/src/crc32/fusion/x86/mod.rs @@ -18,36 +18,19 @@ mod iscsi; use iscsi::sse_pclmulqdq::crc32_iscsi_sse_v4s3x3; #[cfg(target_arch = "x86_64")] -#[rustversion::since(1.89)] use iscsi::avx512_pclmulqdq::crc32_iscsi_avx512_v4s3x3; #[cfg(target_arch = "x86_64")] -#[rustversion::since(1.89)] use iscsi::avx512_vpclmulqdq::crc32_iscsi_avx512_vpclmulqdq_v3x2; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; -/// CRC32 iSCSI calculation for Rust versions before 1.89 (pre-AVX-512 support) -/// -/// -/// This function is called by the wrapper layer after feature detection has been performed. -/// For older Rust versions, only SSE implementation is available. -#[rustversion::before(1.89)] -#[inline(always)] -pub fn crc32_iscsi(crc: u32, data: &[u8]) -> u32 { - // Only SSE implementation is available for Rust versions before 1.89 - // Runtime feature detection is handled by the wrapper layer - unsafe { crc32_iscsi_sse_v4s3x3(crc, data.as_ptr(), data.len()) } -} - -/// CRC32 iSCSI calculation using the highest available instruction set after Rust 1.89 -/// (post-AVX-512 support) +/// CRC32 iSCSI calculation using the highest available instruction set (post-AVX-512 support) /// /// This function is called by the wrapper layer after feature detection has been performed. /// The wrapper layer ensures that only the appropriate implementation is called based on /// cached feature detection results, removing runtime checks from the hot path. -#[rustversion::since(1.89)] #[inline(always)] pub fn crc32_iscsi(crc: u32, data: &[u8]) -> u32 { #[cfg(target_arch = "x86_64")] @@ -72,7 +55,6 @@ pub fn crc32_iscsi(crc: u32, data: &[u8]) -> u32 { unsafe { crc32_iscsi_sse_v4s3x3(crc, data.as_ptr(), data.len()) } } -#[rustversion::since(1.89)] #[cfg(target_arch = "x86_64")] #[inline] #[target_feature(enable = "avx512vl,vpclmulqdq")] @@ -80,7 +62,6 @@ unsafe fn clmul_lo_avx512_vpclmulqdq(a: __m512i, b: __m512i) -> __m512i { _mm512_clmulepi64_epi128(a, b, 0) } -#[rustversion::since(1.89)] #[cfg(target_arch = "x86_64")] #[inline] #[target_feature(enable = "avx512vl,vpclmulqdq")] @@ -228,7 +209,6 @@ mod tests { } } - #[rustversion::since(1.89)] fn test_crc32_iscsi_random(len: usize) { let mut data = vec![0u8; len]; rng().fill(&mut data[..]); @@ -269,21 +249,4 @@ mod tests { ); } } - - #[rustversion::before(1.89)] - fn test_crc32_iscsi_random(len: usize) { - let mut data = vec![0u8; len]; - rng().fill(&mut data[..]); - - let checksum = RUST_CRC32_ISCSI.checksum(&data); - - assert_eq!(crc32_iscsi(0xffffffff, &data) ^ 0xffffffff, checksum); - - unsafe { - assert_eq!( - crc32_iscsi_sse_v4s3x3(0xffffffff, data.as_ptr(), data.len()) ^ 0xffffffff, - checksum - ); - } - } } diff --git a/src/feature_detection.rs b/src/feature_detection.rs index b170a74..c882c84 100644 --- a/src/feature_detection.rs +++ b/src/feature_detection.rs @@ -41,9 +41,6 @@ pub struct ArchCapabilities { pub has_pclmulqdq: bool, pub has_avx512vl: bool, // implicitly enables avx512f, has XOR3 operations pub has_vpclmulqdq: bool, - - // Rust version gates - pub rust_version_supports_avx512: bool, } /// Helper function to convert a performance tier to a human-readable target string @@ -86,7 +83,6 @@ unsafe fn detect_arch_capabilities() -> ArchCapabilities { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, } } } @@ -114,7 +110,6 @@ unsafe fn detect_aarch64_features() -> ArchCapabilities { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, } } @@ -124,18 +119,12 @@ unsafe fn detect_aarch64_features() -> ArchCapabilities { unsafe fn detect_x86_features() -> ArchCapabilities { use std::arch::is_x86_feature_detected; - // Check Rust version support for VPCLMULQDQ (requires 1.89+) - let rust_version_supports_avx512 = check_rust_version_supports_avx512(); - // SSE 4.1 and PCLMULQDQ support are the baseline for hardware acceleration let has_sse41 = is_x86_feature_detected!("sse4.1"); let has_pclmulqdq = has_sse41 && is_x86_feature_detected!("pclmulqdq"); - // After Rust 1.89, AVX-512VL and VPCLMULQDQ can be used if available - let has_avx512vl = - has_pclmulqdq && rust_version_supports_avx512 && is_x86_feature_detected!("avx512vl"); - let has_vpclmulqdq = - has_avx512vl && rust_version_supports_avx512 && is_x86_feature_detected!("vpclmulqdq"); + let has_avx512vl = has_pclmulqdq && is_x86_feature_detected!("avx512vl"); + let has_vpclmulqdq = has_avx512vl && is_x86_feature_detected!("vpclmulqdq"); ArchCapabilities { has_aes: false, @@ -144,28 +133,9 @@ unsafe fn detect_x86_features() -> ArchCapabilities { has_pclmulqdq, has_avx512vl, has_vpclmulqdq, - rust_version_supports_avx512, } } -/// Check if the current Rust version supports VPCLMULQDQ intrinsics -/// VPCLMULQDQ intrinsics were stabilized in Rust 1.89 -#[rustversion::since(1.89)] -#[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -pub(crate) fn check_rust_version_supports_avx512() -> bool { - true -} - -/// Check if the current Rust version supports VPCLMULQDQ intrinsics -/// VPCLMULQDQ intrinsics were stabilized in Rust 1.89 -#[rustversion::before(1.89)] -#[inline(always)] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -pub(crate) fn check_rust_version_supports_avx512() -> bool { - false -} - /// Select the appropriate performance tier based on detected capabilities #[inline(always)] #[allow(unused)] @@ -207,7 +177,6 @@ pub(crate) fn select_performance_tier(capabilities: &ArchCapabilities) -> Perfor /// Enum that holds the different ArchOps implementations for compile-time dispatch /// This avoids the need for trait objects while still providing factory-based selection -#[rustversion::since(1.89)] #[derive(Debug, Clone, Copy)] pub enum ArchOpsInstance { #[cfg(target_arch = "aarch64")] @@ -224,22 +193,8 @@ pub enum ArchOpsInstance { SoftwareFallback, } -#[rustversion::before(1.89)] -#[derive(Debug, Clone, Copy)] -pub enum ArchOpsInstance { - #[cfg(target_arch = "aarch64")] - Aarch64Aes(crate::arch::aarch64::aes::Aarch64AesOps), - #[cfg(target_arch = "aarch64")] - Aarch64AesSha3(crate::arch::aarch64::aes_sha3::Aarch64AesSha3Ops), - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - X86SsePclmulqdq(crate::arch::x86::sse::X86SsePclmulqdqOps), - /// Software fallback - no ArchOps struct needed - SoftwareFallback, -} - impl ArchOpsInstance { #[inline(always)] - #[rustversion::since(1.89)] pub fn get_tier(&self) -> PerformanceTier { match self { #[cfg(target_arch = "aarch64")] @@ -256,20 +211,6 @@ impl ArchOpsInstance { } } - #[inline(always)] - #[rustversion::before(1.89)] - pub fn get_tier(&self) -> PerformanceTier { - match self { - #[cfg(target_arch = "aarch64")] - ArchOpsInstance::Aarch64Aes(_) => PerformanceTier::AArch64Aes, - #[cfg(target_arch = "aarch64")] - ArchOpsInstance::Aarch64AesSha3(_) => PerformanceTier::AArch64AesSha3, - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - ArchOpsInstance::X86SsePclmulqdq(_) => PerformanceTier::X86SsePclmulqdq, - ArchOpsInstance::SoftwareFallback => PerformanceTier::SoftwareTable, - } - } - /// Get a human-readable target string describing the active configuration #[inline(always)] pub fn get_target_string(&self) -> String { @@ -298,9 +239,7 @@ fn create_arch_ops() -> ArchOpsInstance { create_arch_ops_from_tier(tier) } -/// Helper function to create ArchOpsInstance from a performance tier for Rust 1.89+ (when AVX512 -/// stabilized) -#[rustversion::since(1.89)] +/// Helper function to create ArchOpsInstance from a performance tier fn create_arch_ops_from_tier(tier: PerformanceTier) -> ArchOpsInstance { match tier { #[cfg(target_arch = "aarch64")] @@ -340,48 +279,6 @@ fn create_arch_ops_from_tier(tier: PerformanceTier) -> ArchOpsInstance { } } -/// Helper function to create ArchOpsInstance from a performance tier for Rust <1.89 (before AVX512 -/// stabilized) -#[rustversion::before(1.89)] -fn create_arch_ops_from_tier(tier: PerformanceTier) -> ArchOpsInstance { - match tier { - #[cfg(target_arch = "aarch64")] - PerformanceTier::AArch64AesSha3 => { - use crate::arch::aarch64::aes_sha3::Aarch64AesSha3Ops; - ArchOpsInstance::Aarch64AesSha3(Aarch64AesSha3Ops::new()) - } - #[cfg(target_arch = "aarch64")] - PerformanceTier::AArch64Aes => { - use crate::arch::aarch64::aes::Aarch64AesOps; - ArchOpsInstance::Aarch64Aes(Aarch64AesOps) - } - #[cfg(target_arch = "x86_64")] - PerformanceTier::X86_64Avx512Vpclmulqdq => { - // VPCLMULQDQ and AVX512 not available in older Rust versions, fall back to SSE - create_x86_sse_pclmulqdq_ops() - } - #[cfg(target_arch = "x86_64")] - PerformanceTier::X86_64Avx512Pclmulqdq => { - // AVX512 not available in older Rust versions, fall back to SSE - create_x86_sse_pclmulqdq_ops() - } - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - PerformanceTier::X86_64SsePclmulqdq | PerformanceTier::X86SsePclmulqdq => { - create_x86_sse_pclmulqdq_ops() - } - PerformanceTier::SoftwareTable => { - // Use software fallback - ArchOpsInstance::SoftwareFallback - } - // Handle cases where the performance tier doesn't match the current architecture - _ => { - // This can happen when a tier is selected for a different architecture - // Fall back to software implementation - ArchOpsInstance::SoftwareFallback - } - } -} - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn create_x86_sse_pclmulqdq_ops() -> ArchOpsInstance { use crate::arch::x86::sse::X86SsePclmulqdqOps; @@ -400,18 +297,12 @@ pub fn select_performance_tier_for_test(capabilities: &ArchCapabilities) -> Perf } // x86_64 tier selection - VPCLMULQDQ requires AVX512VL - if capabilities.has_vpclmulqdq - && capabilities.has_avx512vl - && capabilities.rust_version_supports_avx512 - { + if capabilities.has_vpclmulqdq && capabilities.has_avx512vl { return PerformanceTier::X86_64Avx512Vpclmulqdq; } // AVX512VL requires PCLMULQDQ and SSE4.1 - if capabilities.has_avx512vl - && capabilities.has_pclmulqdq - && capabilities.rust_version_supports_avx512 - { + if capabilities.has_avx512vl && capabilities.has_pclmulqdq { return PerformanceTier::X86_64Avx512Pclmulqdq; } @@ -428,15 +319,6 @@ pub fn select_performance_tier_for_test(capabilities: &ArchCapabilities) -> Perf mod tests { use super::*; - #[test] - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - fn test_rust_version_check() { - let supports_vpclmulqdq = check_rust_version_supports_avx512(); - - // Should return a boolean without panicking - let _ = supports_vpclmulqdq; - } - #[test] fn test_aarch64_tier_selection() { // Test that aarch64 tier selection follows the expected hierarchy @@ -449,7 +331,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&capabilities_sha3), @@ -464,7 +345,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&capabilities_aes), @@ -479,7 +359,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&capabilities_no_aes), @@ -501,7 +380,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; // AES support means we have PMULL instructions available for CRC calculations @@ -515,7 +393,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert!(capabilities_with_sha3.has_aes); @@ -527,7 +404,7 @@ mod tests { fn test_x86_64_tier_selection() { // Test that x86_64 tier selection follows the expected hierarchy - // Test VPCLMULQDQ + AVX512 (highest tier) on Rust 1.89+ + // Test VPCLMULQDQ + AVX512 (highest tier) let capabilities_vpclmulqdq = ArchCapabilities { has_aes: false, has_sha3: false, @@ -535,14 +412,13 @@ mod tests { has_pclmulqdq: true, has_avx512vl: true, has_vpclmulqdq: true, - rust_version_supports_avx512: true, }; assert_eq!( select_performance_tier_for_test(&capabilities_vpclmulqdq), PerformanceTier::X86_64Avx512Vpclmulqdq ); - // Test AVX512 + PCLMULQDQ (mid-tier) on Rust 1.89+ + // Test AVX512 + PCLMULQDQ (mid-tier) let capabilities_avx512 = ArchCapabilities { has_aes: false, has_sha3: false, @@ -550,43 +426,12 @@ mod tests { has_pclmulqdq: true, has_avx512vl: true, has_vpclmulqdq: false, - rust_version_supports_avx512: true, }; assert_eq!( select_performance_tier_for_test(&capabilities_avx512), PerformanceTier::X86_64Avx512Pclmulqdq ); - // Test VPCLMULQDQ + AVX512 (highest tier) on Rust < 1.89 - let capabilities_vpclmulqdq = ArchCapabilities { - has_aes: false, - has_sha3: false, - has_sse41: true, - has_pclmulqdq: true, - has_avx512vl: true, - has_vpclmulqdq: true, - rust_version_supports_avx512: false, - }; - assert_eq!( - select_performance_tier_for_test(&capabilities_vpclmulqdq), - PerformanceTier::X86_64SsePclmulqdq - ); - - // Test AVX512 + PCLMULQDQ (mid-tier) on Rust < 1.89 - let capabilities_avx512 = ArchCapabilities { - has_aes: false, - has_sha3: false, - has_sse41: true, - has_pclmulqdq: true, - has_avx512vl: true, - has_vpclmulqdq: false, - rust_version_supports_avx512: false, - }; - assert_eq!( - select_performance_tier_for_test(&capabilities_avx512), - PerformanceTier::X86_64SsePclmulqdq - ); - // Test SSE + PCLMULQDQ (baseline tier) let capabilities_sse = ArchCapabilities { has_aes: false, @@ -595,7 +440,6 @@ mod tests { has_pclmulqdq: true, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&capabilities_sse), @@ -610,7 +454,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&capabilities_no_pclmul), @@ -631,7 +474,6 @@ mod tests { has_pclmulqdq: true, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&capabilities_sse), @@ -647,7 +489,6 @@ mod tests { has_pclmulqdq: true, has_avx512vl: false, // No AVX512 on 32-bit x86 has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; // This should select x86_64 tier since we're testing the general case assert_eq!( @@ -663,7 +504,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&capabilities_no_pclmul), @@ -676,7 +516,7 @@ mod tests { // Test that x86 feature hierarchy is properly maintained // SSE4.1 is required for PCLMULQDQ // AVX512VL requires PCLMULQDQ - // VPCLMULQDQ requires AVX512VL and Rust 1.89+ + // VPCLMULQDQ requires AVX512VL // Test feature dependencies are enforced let capabilities_full = ArchCapabilities { @@ -686,7 +526,6 @@ mod tests { has_pclmulqdq: true, has_avx512vl: true, has_vpclmulqdq: true, - rust_version_supports_avx512: true, }; // All x86 features should be available when hierarchy is satisfied @@ -694,46 +533,12 @@ mod tests { assert!(capabilities_full.has_pclmulqdq); assert!(capabilities_full.has_avx512vl); assert!(capabilities_full.has_vpclmulqdq); - assert!(capabilities_full.rust_version_supports_avx512); - } - - #[test] - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - fn test_rust_version_gating() { - // Test that VPCLMULQDQ is properly gated by Rust version - let rust_support = check_rust_version_supports_avx512(); - - // Should return a boolean based on Rust version - // This will be true for Rust 1.89+ and false for earlier versions - assert!(rust_support == true || rust_support == false); } // Mock tests for compile-time and runtime feature agreement scenarios mod mock_feature_agreement_tests { use super::*; - #[test] - fn test_rust_version_gating_scenarios() { - // Test VPCLMULQDQ with different Rust version scenarios - - // All features available but Rust version too old - let capabilities_old_rust = ArchCapabilities { - has_aes: false, - has_sha3: false, - has_sse41: true, - has_pclmulqdq: true, - has_avx512vl: true, - has_vpclmulqdq: true, // Hardware supports it - rust_version_supports_avx512: false, // But Rust version is too old - }; - - // Should not select VPCLMULQDQ or AVX512 tiers due to Rust version constraint - let tier = select_performance_tier_for_test(&capabilities_old_rust); - assert_ne!(tier, PerformanceTier::X86_64Avx512Vpclmulqdq); - assert_ne!(tier, PerformanceTier::X86_64Avx512Pclmulqdq); - assert_eq!(tier, PerformanceTier::X86_64SsePclmulqdq); - } - #[test] fn test_feature_dependency_validation() { // Test that feature dependencies are properly validated @@ -746,7 +551,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; // Should fall back to software since AES is required for SHA3 @@ -763,7 +567,6 @@ mod tests { has_pclmulqdq: true, has_avx512vl: false, // Missing required dependency has_vpclmulqdq: true, // This should be impossible in real detection - rust_version_supports_avx512: true, }; // Should fall back to SSE tier since AVX512VL is required for VPCLMULQDQ @@ -790,7 +593,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&no_features), @@ -805,7 +607,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&aes_only), @@ -820,7 +621,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&aes_sha3), @@ -840,7 +640,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&no_features), @@ -855,7 +654,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&sse_only), @@ -870,29 +668,13 @@ mod tests { has_pclmulqdq: true, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&sse_pclmul), PerformanceTier::X86_64SsePclmulqdq ); - // SSE4.1 + PCLMULQDQ + AVX512VL but old Rust - should fall back to SSE tier - let avx512_pclmul_old_rust = ArchCapabilities { - has_aes: false, - has_sha3: false, - has_sse41: true, - has_pclmulqdq: true, - has_avx512vl: true, - has_vpclmulqdq: false, - rust_version_supports_avx512: false, // Old Rust version - }; - assert_eq!( - select_performance_tier_for_test(&avx512_pclmul_old_rust), - PerformanceTier::X86_64SsePclmulqdq - ); - - // SSE4.1 + PCLMULQDQ + AVX512VL with new Rust - mid-tier + // SSE4.1 + PCLMULQDQ + AVX512VL - mid-tier let avx512_pclmul_new_rust = ArchCapabilities { has_aes: false, has_sha3: false, @@ -900,29 +682,13 @@ mod tests { has_pclmulqdq: true, has_avx512vl: true, has_vpclmulqdq: false, - rust_version_supports_avx512: true, // New Rust version }; assert_eq!( select_performance_tier_for_test(&avx512_pclmul_new_rust), PerformanceTier::X86_64Avx512Pclmulqdq ); - // All features + old Rust - should fall back to SSE tier - let all_features_old_rust = ArchCapabilities { - has_aes: false, - has_sha3: false, - has_sse41: true, - has_pclmulqdq: true, - has_avx512vl: true, - has_vpclmulqdq: true, - rust_version_supports_avx512: false, // Old Rust version - }; - assert_eq!( - select_performance_tier_for_test(&all_features_old_rust), - PerformanceTier::X86_64SsePclmulqdq - ); - - // All features + new Rust - highest tier + // All features - highest tier let all_features_new_rust = ArchCapabilities { has_aes: false, has_sha3: false, @@ -930,7 +696,6 @@ mod tests { has_pclmulqdq: true, has_avx512vl: true, has_vpclmulqdq: true, - rust_version_supports_avx512: true, // New Rust version }; assert_eq!( select_performance_tier_for_test(&all_features_new_rust), @@ -950,7 +715,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; assert_eq!( select_performance_tier_for_test(&no_features), @@ -966,7 +730,6 @@ mod tests { has_pclmulqdq: true, has_avx512vl: false, // AVX512 not available on 32-bit x86 has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; // The test function will return x86_64 tier since it doesn't distinguish architectures assert_eq!( @@ -1017,7 +780,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; // Should select highest tier @@ -1053,7 +815,6 @@ mod tests { has_pclmulqdq: true, has_avx512vl: true, has_vpclmulqdq: true, - rust_version_supports_avx512: true, }; // Should select highest tier @@ -1091,27 +852,6 @@ mod tests { ); } - #[test] - fn test_rust_version_degradation() { - // Test degradation when Rust version doesn't support VPCLMULQDQ - - let capabilities_with_vpclmulqdq = ArchCapabilities { - has_aes: false, - has_sha3: false, - has_sse41: true, - has_pclmulqdq: true, - has_avx512vl: true, - has_vpclmulqdq: true, - rust_version_supports_avx512: false, // Old Rust version - }; - - // Should degrade from VPCLMULQDQ tier to SSE tier due to Rust version - assert_eq!( - select_performance_tier_for_test(&capabilities_with_vpclmulqdq), - PerformanceTier::X86_64SsePclmulqdq - ); - } - #[test] fn test_partial_feature_availability() { // Test scenarios where only some features in a tier are available @@ -1124,7 +864,6 @@ mod tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; // Should fall back to software since AES is required for SHA3 assert_eq!( @@ -1140,7 +879,6 @@ mod tests { has_pclmulqdq: true, has_avx512vl: false, has_vpclmulqdq: true, // This would be impossible in real detection - rust_version_supports_avx512: true, }; // Should fall back to SSE tier since AVX512VL is required for VPCLMULQDQ assert_eq!( @@ -1164,7 +902,6 @@ mod software_fallback_tests { has_pclmulqdq: false, has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; let tier = select_performance_tier_for_test(&capabilities_no_aes); @@ -1185,7 +922,6 @@ mod software_fallback_tests { has_pclmulqdq: false, // But PCLMULQDQ not available has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; let tier = select_performance_tier_for_test(&capabilities_no_pclmul); @@ -1203,7 +939,6 @@ mod software_fallback_tests { has_pclmulqdq: false, // PCLMULQDQ requires SSE4.1 has_avx512vl: false, has_vpclmulqdq: false, - rust_version_supports_avx512: false, }; let tier = select_performance_tier_for_test(&capabilities_no_sse); From 63489df3989cdbcb801376b952110a5dd706c832 Mon Sep 17 00:00:00 2001 From: Don MacAskill Date: Sat, 8 Nov 2025 21:27:56 -0800 Subject: [PATCH 2/3] Use is_multile_of() --- src/bin/checksum.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/checksum.rs b/src/bin/checksum.rs index 962c790..8bcac41 100644 --- a/src/bin/checksum.rs +++ b/src/bin/checksum.rs @@ -409,7 +409,7 @@ fn format_number_with_commas(n: u64) -> String { let chars: Vec = s.chars().collect(); for (i, ch) in chars.iter().enumerate() { - if i > 0 && (chars.len() - i) % 3 == 0 { + if i > 0 && (chars.len() - i).is_multiple_of(3) { result.push(','); } result.push(*ch); From 0e0db0b716f1be70f0eb803d524098de4a57c2da Mon Sep 17 00:00:00 2001 From: Don MacAskill Date: Sat, 8 Nov 2025 21:32:57 -0800 Subject: [PATCH 3/3] Fix typo --- src/arch/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 0d31dac..f090bcc 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -114,7 +114,7 @@ pub(crate) unsafe fn update(state: u64, bytes: &[u8], params: CrcParams) -> u64 }, ArchOpsInstance::SoftwareFallback => { #[cfg(target_arch = "x86")] - crate::arch::x86_software_update(state, bytes, params); + crate::arch::software::update(state, bytes, params); // This should never happen, but just in case panic!("x86 features missing (SSE4.1 && PCLMULQDQ)");