Skip to content

Commit 9f4b9c9

Browse files
committed
Remove AVX-512 RNG backend, since RNG doesn't expose enough parallelism to make it worth the complexity
1 parent 0442f2f commit 9f4b9c9

File tree

3 files changed

+4
-57
lines changed

3 files changed

+4
-57
lines changed

chacha20/src/backends/avx512.rs

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
use crate::{Rounds, Variant};
33
use core::marker::PhantomData;
44

5-
#[cfg(feature = "rng")]
6-
use crate::ChaChaCore;
7-
85
#[cfg(feature = "cipher")]
96
use crate::{STATE_WORDS, chacha::Block};
107

@@ -59,49 +56,6 @@ where
5956
}
6057
}
6158

62-
#[inline]
63-
#[target_feature(enable = "avx512f")]
64-
#[cfg(feature = "rng")]
65-
pub(crate) unsafe fn rng_inner<R, V>(core: &mut ChaChaCore<R, V>, buffer: &mut [u32; 64])
66-
where
67-
R: Rounds,
68-
V: Variant,
69-
{
70-
use core::slice;
71-
72-
use crate::rng::BLOCK_WORDS;
73-
74-
let state_ptr = core.state.as_ptr() as *const __m128i;
75-
let v = [
76-
_mm512_broadcast_i32x4(_mm_loadu_si128(state_ptr.add(0))),
77-
_mm512_broadcast_i32x4(_mm_loadu_si128(state_ptr.add(1))),
78-
_mm512_broadcast_i32x4(_mm_loadu_si128(state_ptr.add(2))),
79-
];
80-
let mut c = _mm512_broadcast_i32x4(_mm_loadu_si128(state_ptr.add(3)));
81-
c = _mm512_add_epi64(c, _mm512_set_epi64(0, 3, 0, 2, 0, 1, 0, 0));
82-
let mut ctr = [c; MAX_N];
83-
for i in 0..MAX_N {
84-
ctr[i] = c;
85-
c = _mm512_add_epi64(c, _mm512_set_epi64(0, 4, 0, 4, 0, 4, 0, 4));
86-
}
87-
let mut backend = Backend::<R, V> {
88-
v,
89-
ctr,
90-
_pd: PhantomData,
91-
};
92-
93-
let buffer = slice::from_raw_parts_mut(
94-
buffer.as_mut_ptr().cast::<Block>(),
95-
buffer.len() / BLOCK_WORDS as usize,
96-
);
97-
backend.gen_par_ks_blocks_inner::<4, { 4 / BLOCKS_PER_VECTOR }>(buffer.try_into().unwrap());
98-
99-
core.state[12] =
100-
_mm256_extract_epi32::<0>(_mm512_extracti32x8_epi32::<0>(backend.ctr[0])) as u32;
101-
core.state[13] =
102-
_mm256_extract_epi32::<1>(_mm512_extracti32x8_epi32::<0>(backend.ctr[0])) as u32;
103-
}
104-
10559
struct Backend<R: Rounds, V: Variant> {
10660
state: [__m128i; 3],
10761
ctr: __m128i,

chacha20/src/rng.rs

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,7 @@ impl<R: Rounds, V: Variant> ChaChaCore<R, V> {
189189
backends::soft::Backend(self).gen_ks_blocks(buffer);
190190
} else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
191191
cfg_if! {
192-
if #[cfg(chacha20_force_avx512)] {
193-
unsafe {
194-
backends::avx512::rng_inner::<R, V>(self, buffer);
195-
}
196-
} else if #[cfg(chacha20_force_avx2)] {
192+
if #[cfg(chacha20_force_avx2)] {
197193
unsafe {
198194
backends::avx2::rng_inner::<R, V>(self, buffer);
199195
}
@@ -202,12 +198,8 @@ impl<R: Rounds, V: Variant> ChaChaCore<R, V> {
202198
backends::sse2::rng_inner::<R, V>(self, buffer);
203199
}
204200
} else {
205-
let (avx512_token, avx2_token, sse2_token) = self.tokens;
206-
if avx512_token.get() {
207-
unsafe {
208-
backends::avx512::rng_inner::<R, V>(self, buffer);
209-
}
210-
} else if avx2_token.get() {
201+
let (_avx512_token, avx2_token, sse2_token) = self.tokens;
202+
if avx2_token.get() {
211203
unsafe {
212204
backends::avx2::rng_inner::<R, V>(self, buffer);
213205
}

chacha20/tests/kats.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ mod chacha20test {
9898

9999
// Long input test to check the full parallel AVX-512 implementation.
100100
// Test data generated from random byte strings.
101+
#[cfg(feature = "cipher")]
101102
mod chacha20test_long {
102103
use chacha20::{ChaCha20, KeyIvInit};
103104
use cipher::StreamCipher;

0 commit comments

Comments
 (0)