|
2 | 2 | use crate::{Rounds, Variant}; |
3 | 3 | use core::marker::PhantomData; |
4 | 4 |
|
5 | | -#[cfg(feature = "rng")] |
6 | | -use crate::ChaChaCore; |
7 | | - |
8 | 5 | #[cfg(feature = "cipher")] |
9 | 6 | use crate::{STATE_WORDS, chacha::Block}; |
10 | 7 |
|
|
59 | 56 | } |
60 | 57 | } |
61 | 58 |
|
62 | | -#[inline] |
63 | | -#[target_feature(enable = "avx512f")] |
64 | | -#[cfg(feature = "rng")] |
65 | | -pub(crate) unsafe fn rng_inner<R, V>(core: &mut ChaChaCore<R, V>, buffer: &mut [u32; 64]) |
66 | | -where |
67 | | - R: Rounds, |
68 | | - V: Variant, |
69 | | -{ |
70 | | - use core::slice; |
71 | | - |
72 | | - use crate::rng::BLOCK_WORDS; |
73 | | - |
74 | | - let state_ptr = core.state.as_ptr() as *const __m128i; |
75 | | - let v = [ |
76 | | - _mm512_broadcast_i32x4(_mm_loadu_si128(state_ptr.add(0))), |
77 | | - _mm512_broadcast_i32x4(_mm_loadu_si128(state_ptr.add(1))), |
78 | | - _mm512_broadcast_i32x4(_mm_loadu_si128(state_ptr.add(2))), |
79 | | - ]; |
80 | | - let mut c = _mm512_broadcast_i32x4(_mm_loadu_si128(state_ptr.add(3))); |
81 | | - c = _mm512_add_epi64(c, _mm512_set_epi64(0, 3, 0, 2, 0, 1, 0, 0)); |
82 | | - let mut ctr = [c; MAX_N]; |
83 | | - for i in 0..MAX_N { |
84 | | - ctr[i] = c; |
85 | | - c = _mm512_add_epi64(c, _mm512_set_epi64(0, 4, 0, 4, 0, 4, 0, 4)); |
86 | | - } |
87 | | - let mut backend = Backend::<R, V> { |
88 | | - v, |
89 | | - ctr, |
90 | | - _pd: PhantomData, |
91 | | - }; |
92 | | - |
93 | | - let buffer = slice::from_raw_parts_mut( |
94 | | - buffer.as_mut_ptr().cast::<Block>(), |
95 | | - buffer.len() / BLOCK_WORDS as usize, |
96 | | - ); |
97 | | - backend.gen_par_ks_blocks_inner::<4, { 4 / BLOCKS_PER_VECTOR }>(buffer.try_into().unwrap()); |
98 | | - |
99 | | - core.state[12] = |
100 | | - _mm256_extract_epi32::<0>(_mm512_extracti32x8_epi32::<0>(backend.ctr[0])) as u32; |
101 | | - core.state[13] = |
102 | | - _mm256_extract_epi32::<1>(_mm512_extracti32x8_epi32::<0>(backend.ctr[0])) as u32; |
103 | | -} |
104 | | - |
105 | 59 | struct Backend<R: Rounds, V: Variant> { |
106 | 60 | state: [__m128i; 3], |
107 | 61 | ctr: __m128i, |
|
0 commit comments