Skip to content

Commit 20d700a

Browse files
committed
Test PAR_BLOCKS=8
New throughput results on Zen 4: test chacha20_bench1_16b ... bench: 25.53 ns/iter (+/- 0.75) = 640 MB/s test chacha20_bench2_256b ... bench: 255.88 ns/iter (+/- 4.16) = 1003 MB/s test chacha20_bench3_1kib ... bench: 192.76 ns/iter (+/- 4.15) = 5333 MB/s test chacha20_bench4_16kib ... bench: 2,873.78 ns/iter (+/- 62.99) = 5702 MB/s 3x regression for 256b case, since minimum 512b is required to use parallel.
1 parent 7346b83 commit 20d700a

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

chacha20/src/backends/avx512.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::{STATE_WORDS, chacha::Block};
1111
#[cfg(feature = "cipher")]
1212
use cipher::{
1313
BlockSizeUser, ParBlocks, ParBlocksSizeUser, StreamCipherBackend, StreamCipherClosure,
14-
consts::{U4, U64},
14+
consts::{U8, U64},
1515
};
1616

1717
#[cfg(target_arch = "x86")]
@@ -20,7 +20,7 @@ use core::arch::x86::*;
2020
use core::arch::x86_64::*;
2121

2222
/// Number of blocks processed in parallel.
23-
const PAR_BLOCKS: usize = 4;
23+
const PAR_BLOCKS: usize = 8;
2424
/// Number of `__m512i` to store parallel blocks.
2525
const N: usize = PAR_BLOCKS / 4;
2626

@@ -125,7 +125,7 @@ impl<R: Rounds, V: Variant> BlockSizeUser for Backend<R, V> {
125125

126126
#[cfg(feature = "cipher")]
127127
impl<R: Rounds, V: Variant> ParBlocksSizeUser for Backend<R, V> {
128-
type ParBlocksSize = U4;
128+
type ParBlocksSize = U8;
129129
}
130130

131131
#[cfg(feature = "cipher")]

0 commit comments

Comments
 (0)