|
1 | 1 | //! Argon2 memory block functions |
2 | 2 |
|
3 | 3 | use core::{ |
| 4 | + convert::{AsMut, AsRef}, |
4 | 5 | num::Wrapping, |
5 | | - ops::{BitXor, BitXorAssign, Index, IndexMut}, |
6 | | - slice, |
| 6 | + ops::{BitXor, BitXorAssign}, |
7 | 7 | }; |
8 | 8 |
|
9 | 9 | #[cfg(feature = "zeroize")] |
10 | 10 | use zeroize::Zeroize; |
11 | 11 |
|
12 | | -/// Structure for the (1KB) memory block implemented as 128 64-bit words. |
13 | | -#[derive(Copy, Clone, Debug)] |
14 | | -pub struct Block([u64; Self::SIZE / 8]); |
| 12 | +const TRUNC: u64 = u32::MAX as u64; |
| 13 | + |
| 14 | +macro_rules! permutate_step { |
| 15 | + ($a:expr, $b:expr, $c:expr, $d:expr) => { |
| 16 | + $a = |
| 17 | + (Wrapping($a) + Wrapping($b) + (Wrapping(2) * Wrapping(($a & TRUNC) * ($b & TRUNC)))).0; |
| 18 | + $d = ($d ^ $a).rotate_right(32); |
| 19 | + $c = |
| 20 | + (Wrapping($c) + Wrapping($d) + (Wrapping(2) * Wrapping(($c & TRUNC) * ($d & TRUNC)))).0; |
| 21 | + $b = ($b ^ $c).rotate_right(24); |
| 22 | + |
| 23 | + $a = |
| 24 | + (Wrapping($a) + Wrapping($b) + (Wrapping(2) * Wrapping(($a & TRUNC) * ($b & TRUNC)))).0; |
| 25 | + $d = ($d ^ $a).rotate_right(16); |
| 26 | + $c = |
| 27 | + (Wrapping($c) + Wrapping($d) + (Wrapping(2) * Wrapping(($c & TRUNC) * ($d & TRUNC)))).0; |
| 28 | + $b = ($b ^ $c).rotate_right(63); |
| 29 | + }; |
| 30 | +} |
15 | 31 |
|
16 | | -impl Default for Block { |
17 | | - fn default() -> Self { |
18 | | - Self([0u64; Self::SIZE / 8]) |
19 | | - } |
| 32 | +macro_rules! permutate { |
| 33 | + ( |
| 34 | + $v0:expr, $v1:expr, $v2:expr, $v3:expr, |
| 35 | + $v4:expr, $v5:expr, $v6:expr, $v7:expr, |
| 36 | + $v8:expr, $v9:expr, $v10:expr, $v11:expr, |
| 37 | + $v12:expr, $v13:expr, $v14:expr, $v15:expr, |
| 38 | + ) => { |
| 39 | + permutate_step!($v0, $v4, $v8, $v12); |
| 40 | + permutate_step!($v1, $v5, $v9, $v13); |
| 41 | + permutate_step!($v2, $v6, $v10, $v14); |
| 42 | + permutate_step!($v3, $v7, $v11, $v15); |
| 43 | + permutate_step!($v0, $v5, $v10, $v15); |
| 44 | + permutate_step!($v1, $v6, $v11, $v12); |
| 45 | + permutate_step!($v2, $v7, $v8, $v13); |
| 46 | + permutate_step!($v3, $v4, $v9, $v14); |
| 47 | + }; |
20 | 48 | } |
21 | 49 |
|
| 50 | +/// Structure for the (1 KiB) memory block implemented as 128 64-bit words. |
| 51 | +#[derive(Copy, Clone, Debug)] |
| 52 | +#[repr(align(64))] |
| 53 | +pub struct Block([u64; Self::SIZE / 8]); |
| 54 | + |
22 | 55 | impl Block { |
23 | 56 | /// Memory block size in bytes |
24 | 57 | pub const SIZE: usize = 1024; |
25 | 58 |
|
26 | | - /// Load a block from a block-sized byte slice |
27 | | - pub(crate) fn load(&mut self, input: &[u8]) { |
28 | | - debug_assert_eq!(input.len(), Block::SIZE); |
29 | | - |
30 | | - for (i, chunk) in input.chunks(8).enumerate() { |
31 | | - self[i] = u64::from_le_bytes(chunk.try_into().unwrap()); |
32 | | - } |
| 59 | + pub(crate) fn as_bytes(&self) -> &[u8; Self::SIZE] { |
| 60 | + let ptr = self.0.as_ptr() as *const u8; |
| 61 | + let slice = unsafe { core::slice::from_raw_parts(ptr, Self::SIZE) }; |
| 62 | + slice.try_into().unwrap() |
33 | 63 | } |
34 | 64 |
|
35 | | - /// Iterate over the `u64` values contained in this block |
36 | | - pub(crate) fn iter(&self) -> slice::Iter<'_, u64> { |
37 | | - self.0.iter() |
| 65 | + pub(crate) fn as_mut_bytes(&mut self) -> &mut [u8; Self::SIZE] { |
| 66 | + let ptr = self.0.as_mut_ptr() as *mut u8; |
| 67 | + let slice = unsafe { core::slice::from_raw_parts_mut(ptr, Self::SIZE) }; |
| 68 | + slice.try_into().unwrap() |
38 | 69 | } |
39 | 70 |
|
40 | | - /// Iterate mutably over the `u64` values contained in this block |
41 | | - pub(crate) fn iter_mut(&mut self) -> slice::IterMut<'_, u64> { |
42 | | - self.0.iter_mut() |
43 | | - } |
44 | | - |
45 | | - /// Function fills a new memory block and optionally XORs the old block over the new one. |
46 | | - // TODO(tarcieri): optimized implementation (i.e. from opt.c instead of ref.c) |
47 | | - pub(crate) fn fill_block(&mut self, prev_block: Block, ref_block: Block, with_xor: bool) { |
48 | | - let mut block_r = ref_block ^ prev_block; |
49 | | - let mut block_tmp = block_r; |
50 | | - |
51 | | - // Now block_r = ref_block + prev_block and block_tmp = ref_block + prev_block |
52 | | - if with_xor { |
53 | | - // Saving the next block contents for XOR over |
54 | | - block_tmp ^= *self; |
55 | | - // Now block_r = ref_block + prev_block and |
56 | | - // block_tmp = ref_block + prev_block + next_block |
57 | | - } |
58 | | - |
59 | | - /// Note: designed by the Lyra PHC team |
60 | | - fn blake2_mult(x: u64, y: u64) -> u64 { |
61 | | - let m = 0xFFFFFFFF; |
62 | | - let xy = Wrapping((x & m) * (y & m)) * Wrapping(2); |
63 | | - (Wrapping(x) + Wrapping(y) + xy).0 |
64 | | - } |
65 | | - |
66 | | - /// Blake2 round function |
67 | | - macro_rules! blake2_round { |
68 | | - ( |
69 | | - $v0:expr, $v1:expr, $v2:expr, $v3:expr, $v4:expr, $v5:expr, $v6:expr, $v7:expr, |
70 | | - $v8:expr, $v9:expr, $v10:expr, $v11:expr, $v12:expr, $v13:expr, $v14:expr, $v15:expr |
71 | | - ) => { |
72 | | - blake2_inner!($v0, $v4, $v8, $v12); |
73 | | - blake2_inner!($v1, $v5, $v9, $v13); |
74 | | - blake2_inner!($v2, $v6, $v10, $v14); |
75 | | - blake2_inner!($v3, $v7, $v11, $v15); |
76 | | - blake2_inner!($v0, $v5, $v10, $v15); |
77 | | - blake2_inner!($v1, $v6, $v11, $v12); |
78 | | - blake2_inner!($v2, $v7, $v8, $v13); |
79 | | - blake2_inner!($v3, $v4, $v9, $v14); |
80 | | - }; |
81 | | - } |
82 | | - |
83 | | - macro_rules! blake2_inner { |
84 | | - ($a:expr, $b:expr, $c:expr, $d:expr) => { |
85 | | - $a = blake2_mult($a, $b); |
86 | | - $d = ($d ^ $a).rotate_right(32); |
87 | | - $c = blake2_mult($c, $d); |
88 | | - $b = ($b ^ $c).rotate_right(24); |
89 | | - $a = blake2_mult($a, $b); |
90 | | - $d = ($d ^ $a).rotate_right(16); |
91 | | - $c = blake2_mult($c, $d); |
92 | | - $b = ($b ^ $c).rotate_right(63); |
93 | | - }; |
94 | | - } |
95 | | - |
96 | | - // Apply Blake2 on columns of 64-bit words: (0, 1, ..., 15), then |
97 | | - // (16, 17, ..31)... finally (112, 113, ...127) |
98 | | - for i in 0..8 { |
99 | | - blake2_round!( |
100 | | - block_r[16 * i], |
101 | | - block_r[16 * i + 1], |
102 | | - block_r[16 * i + 2], |
103 | | - block_r[16 * i + 3], |
104 | | - block_r[16 * i + 4], |
105 | | - block_r[16 * i + 5], |
106 | | - block_r[16 * i + 6], |
107 | | - block_r[16 * i + 7], |
108 | | - block_r[16 * i + 8], |
109 | | - block_r[16 * i + 9], |
110 | | - block_r[16 * i + 10], |
111 | | - block_r[16 * i + 11], |
112 | | - block_r[16 * i + 12], |
113 | | - block_r[16 * i + 13], |
114 | | - block_r[16 * i + 14], |
115 | | - block_r[16 * i + 15] |
| 71 | + pub(crate) fn compress(rhs: &Self, lhs: &Self) -> Self { |
| 72 | + let r = *rhs ^ lhs; |
| 73 | + |
| 74 | + // Apply permutations rowwise |
| 75 | + let mut q = r; |
| 76 | + for chunk in q.0.chunks_exact_mut(16) { |
| 77 | + #[rustfmt::skip] |
| 78 | + permutate!( |
| 79 | + chunk[0], chunk[1], chunk[2], chunk[3], |
| 80 | + chunk[4], chunk[5], chunk[6], chunk[7], |
| 81 | + chunk[8], chunk[9], chunk[10], chunk[11], |
| 82 | + chunk[12], chunk[13], chunk[14], chunk[15], |
116 | 83 | ); |
117 | 84 | } |
118 | 85 |
|
119 | | - // Apply Blake2 on rows of 64-bit words: (0, 1, 16, 17, ...112, 113), then |
120 | | - // (2, 3, 18, 19, ..., 114, 115).. finally (14, 15, 30, 31, ..., 126, 127) |
| 86 | + // Apply permutations columnwise |
121 | 87 | for i in 0..8 { |
122 | | - blake2_round!( |
123 | | - block_r[2 * i], |
124 | | - block_r[2 * i + 1], |
125 | | - block_r[2 * i + 16], |
126 | | - block_r[2 * i + 17], |
127 | | - block_r[2 * i + 32], |
128 | | - block_r[2 * i + 33], |
129 | | - block_r[2 * i + 48], |
130 | | - block_r[2 * i + 49], |
131 | | - block_r[2 * i + 64], |
132 | | - block_r[2 * i + 65], |
133 | | - block_r[2 * i + 80], |
134 | | - block_r[2 * i + 81], |
135 | | - block_r[2 * i + 96], |
136 | | - block_r[2 * i + 97], |
137 | | - block_r[2 * i + 112], |
138 | | - block_r[2 * i + 113] |
| 88 | + let b = i * 2; |
| 89 | + |
| 90 | + #[rustfmt::skip] |
| 91 | + permutate!( |
| 92 | + q.0[b], q.0[b + 1], |
| 93 | + q.0[b + 16], q.0[b + 17], |
| 94 | + q.0[b + 32], q.0[b + 33], |
| 95 | + q.0[b + 48], q.0[b + 49], |
| 96 | + q.0[b + 64], q.0[b + 65], |
| 97 | + q.0[b + 80], q.0[b + 81], |
| 98 | + q.0[b + 96], q.0[b + 97], |
| 99 | + q.0[b + 112], q.0[b + 113], |
139 | 100 | ); |
140 | 101 | } |
141 | 102 |
|
142 | | - *self = block_tmp ^ block_r; |
| 103 | + q ^= &r; |
| 104 | + q |
143 | 105 | } |
144 | 106 | } |
145 | 107 |
|
146 | | -impl BitXor for Block { |
147 | | - type Output = Self; |
| 108 | +impl Default for Block { |
| 109 | + fn default() -> Self { |
| 110 | + Self([0u64; Self::SIZE / 8]) |
| 111 | + } |
| 112 | +} |
148 | 113 |
|
149 | | - fn bitxor(self, rhs: Self) -> Self::Output { |
150 | | - let mut res = self; |
151 | | - res ^= rhs; |
152 | | - res |
| 114 | +impl AsRef<[u64]> for Block { |
| 115 | + fn as_ref(&self) -> &[u64] { |
| 116 | + &self.0 |
153 | 117 | } |
154 | 118 | } |
155 | 119 |
|
156 | | -impl BitXorAssign for Block { |
157 | | - fn bitxor_assign(&mut self, rhs: Self) { |
158 | | - for (a, b) in self.iter_mut().zip(rhs.iter()) { |
159 | | - *a ^= *b; |
160 | | - } |
| 120 | +impl AsMut<[u64]> for Block { |
| 121 | + fn as_mut(&mut self) -> &mut [u64] { |
| 122 | + &mut self.0 |
161 | 123 | } |
162 | 124 | } |
163 | 125 |
|
164 | | -impl Index<usize> for Block { |
165 | | - type Output = u64; |
| 126 | +impl BitXor<&Block> for Block { |
| 127 | + type Output = Block; |
166 | 128 |
|
167 | | - fn index(&self, index: usize) -> &u64 { |
168 | | - &self.0[index] |
| 129 | + fn bitxor(mut self, rhs: &Block) -> Self::Output { |
| 130 | + self ^= rhs; |
| 131 | + self |
169 | 132 | } |
170 | 133 | } |
171 | 134 |
|
172 | | -impl IndexMut<usize> for Block { |
173 | | - fn index_mut(&mut self, index: usize) -> &mut u64 { |
174 | | - &mut self.0[index] |
| 135 | +impl BitXorAssign<&Block> for Block { |
| 136 | + fn bitxor_assign(&mut self, rhs: &Block) { |
| 137 | + for (dst, src) in self.0.iter_mut().zip(rhs.0.iter().copied()) { |
| 138 | + *dst ^= src; |
| 139 | + } |
175 | 140 | } |
176 | 141 | } |
177 | 142 |
|
|
0 commit comments