1- //! pwxform: parallel wide transformation
1+ //! pwxform stands for "parallel wide transformation", although it can as well be tuned to be as
2+ //! narrow as one 64-bit lane.
3+ //!
4+ //! It operates on 64-bit lanes which are designed to be grouped into wider "simple SIMD" lanes,
5+ //! which are in turn possibly grouped into an even wider "gather SIMD" vector.
26
37use crate :: {
48 salsa20,
59 util:: { slice_as_chunks_mut, xor} ,
610} ;
711
8- // These are tunable, but they must meet certain constraints.
12+ /// Number of 64-bit lanes per "simple SIMD" lane (requiring only arithmetic and bitwise operations
13+ /// on its 64-bit elements). Must be a power of 2.
914const PWXSIMPLE : usize = 2 ;
15+
16+ /// Number of parallel "simple SIMD" lanes per "gather SIMD" vector (requiring "S-box lookups" of
17+ /// values as wide as a "simple SIMD" lane from PWXgather typically non-contiguous memory
18+ /// locations). Must be a power of 2.
1019const PWXGATHER : usize = 4 ;
20+
21+ /// Number of sequential rounds of pwxform’s basic transformation. Must be a power of 2, plus 2
22+ /// (e.g. 3, 4, 6, 10).
1123const PWXROUNDS : usize = 6 ;
24+
25+ /// Number of S-box index bits, thereby controlling the size of each of pwxform’s two S-boxes
26+ /// (in "simple SIMD" wide elements).
1227const SWIDTH : usize = 8 ;
1328
14- // Derived values. Not tunable on their own.
1529const PWXBYTES : usize = PWXGATHER * PWXSIMPLE * 8 ;
1630const PWXWORDS : usize = PWXBYTES / size_of :: < u32 > ( ) ;
1731const SMASK : usize = ( ( 1 << SWIDTH ) - 1 ) * PWXSIMPLE * 8 ;
@@ -28,9 +42,13 @@ pub(crate) struct PwxformCtx<'a> {
2842}
2943
3044impl PwxformCtx < ' _ > {
31- /// Compute `B = BlockMix_pwxform{salsa20/2, ctx, r}(B)`.
45+ /// Compute `B = BlockMix_pwxform{salsa20/2, ctx, r}(B)`. Input `B` must be 128 bytes in length.
3246 ///
33- /// The input `B` must be 128r bytes in length.
47+ /// `BlockMix_pwxform` differs from scrypt’s `BlockMix` in that it doesn’t shuffle output
48+ /// sub-blocks, uses pwxform in place of Salsa20/8 for as long as sub-blocks processed with
49+ /// pwxform fit in the provided block B, and finally uses Salsa20/2 (that is, Salsa20 with only
50+ /// one double-round) to post-process the last sub-block output by pwxform (thereby finally
51+ /// mixing pwxform’s parallel lanes).
3452 pub ( crate ) fn blockmix_pwxform ( & mut self , b : & mut [ u32 ] , r : usize ) {
3553 // Convert 128-byte blocks to PWXbytes blocks
3654 // TODO(tarcieri): use upstream `[T]::as_chunks_mut` when MSRV is 1.88
0 commit comments