@@ -7,12 +7,34 @@ use std::ptr;
77#[ cfg( test) ]
88mod tests;
99
10+ // The SipHash algorithm operates on 8-byte chunks.
1011const ELEM_SIZE : usize = mem:: size_of :: < u64 > ( ) ;
11- const BUFFER_SIZE_ELEMS : usize = 8 ;
12- const BUFFER_SIZE_BYTES : usize = BUFFER_SIZE_ELEMS * ELEM_SIZE ;
13- const BUFFER_SIZE_ELEMS_SPILL : usize = BUFFER_SIZE_ELEMS + 1 ;
14- const BUFFER_SIZE_BYTES_SPILL : usize = BUFFER_SIZE_ELEMS_SPILL * ELEM_SIZE ;
15- const BUFFER_SPILL_INDEX : usize = BUFFER_SIZE_ELEMS_SPILL - 1 ;
12+
13+ // Size of the buffer in number of elements, not including the spill.
14+ //
15+ // The selection of this size was guided by rustc-perf benchmark comparisons of
16+ // different buffer sizes. It should be periodically reevaluated as the compiler
17+ // implementation and input characteristics change.
18+ //
19+ // Using the same-sized buffer for everything we hash is a performance versus
20+ // complexity tradeoff. The ideal buffer size, and whether buffering should even
21+ // be used, depends on what is being hashed. It may be worth it to size the
22+ // buffer appropriately (perhaps by making SipHasher128 generic over the buffer
23+ // size) or disable buffering depending on what is being hashed. But at this
24+ // time, we use the same buffer size for everything.
25+ const BUFFER_CAPACITY : usize = 8 ;
26+
27+ // Size of the buffer in bytes, not including the spill.
28+ const BUFFER_SIZE : usize = BUFFER_CAPACITY * ELEM_SIZE ;
29+
30+ // Size of the buffer in number of elements, including the spill.
31+ const BUFFER_WITH_SPILL_CAPACITY : usize = BUFFER_CAPACITY + 1 ;
32+
33+ // Size of the buffer in bytes, including the spill.
34+ const BUFFER_WITH_SPILL_SIZE : usize = BUFFER_WITH_SPILL_CAPACITY * ELEM_SIZE ;
35+
36+ // Index of the spill element in the buffer.
37+ const BUFFER_SPILL_INDEX : usize = BUFFER_WITH_SPILL_CAPACITY - 1 ;
1638
1739#[ derive( Debug , Clone ) ]
1840#[ repr( C ) ]
@@ -22,10 +44,10 @@ pub struct SipHasher128 {
2244 // `processed`, and then repetition of that pattern until hashing is done.
2345 // This is the basis for the ordering of fields below. However, in practice
2446 // the cache miss-rate for data access is extremely low regardless of order.
25- nbuf : usize , // how many bytes in buf are valid
26- buf : [ MaybeUninit < u64 > ; BUFFER_SIZE_ELEMS_SPILL ] , // unprocessed bytes le
27- state : State , // hash State
28- processed : usize , // how many bytes we've processed
47+ nbuf : usize , // how many bytes in buf are valid
48+ buf : [ MaybeUninit < u64 > ; BUFFER_WITH_SPILL_CAPACITY ] , // unprocessed bytes le
49+ state : State , // hash State
50+ processed : usize , // how many bytes we've processed
2951}
3052
3153#[ derive( Debug , Clone , Copy ) ]
@@ -64,13 +86,18 @@ macro_rules! compress {
6486// Copies up to 8 bytes from source to destination. This performs better than
6587// `ptr::copy_nonoverlapping` on microbenchmarks and may perform better on real
6688// workloads since all of the copies have fixed sizes and avoid calling memcpy.
89+ //
90+ // This is specifically designed for copies of up to 8 bytes, because that's the
91+ // maximum of number bytes needed to fill an 8-byte-sized element on which
92+ // SipHash operates. Note that for variable-sized copies which are known to be
93+ // less than 8 bytes, this function will perform more work than necessary unless
94+ // the compiler is able to optimize the extra work away.
6795#[ inline]
6896unsafe fn copy_nonoverlapping_small ( src : * const u8 , dst : * mut u8 , count : usize ) {
69- const COUNT_MAX : usize = 8 ;
70- debug_assert ! ( count <= COUNT_MAX ) ;
97+ debug_assert ! ( count <= 8 ) ;
7198
72- if count == COUNT_MAX {
73- ptr:: copy_nonoverlapping ( src, dst, COUNT_MAX ) ;
99+ if count == 8 {
100+ ptr:: copy_nonoverlapping ( src, dst, 8 ) ;
74101 return ;
75102 }
76103
@@ -116,10 +143,13 @@ unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize)
116143// The buffer includes a "spill"--an extra element at the end--which simplifies
117144// the integer write buffer processing path. The value that fills the buffer can
118145// be written with a statically sized write that may spill over into the spill.
119- // After the buffer is processed, the part of the value that spilled over can
146+ // After the buffer is processed, the part of the value that spilled over can be
120147// written from the spill to the beginning of the buffer with another statically
121- // sized write. Due to static sizes, this scheme performs better than copying
122- // the exact number of bytes needed into the end and beginning of the buffer.
148+ // sized write. This write may copy more bytes than actually spilled over, but
149+ // we maintain the metadata such that any extra copied bytes will be ignored by
150+ // subsequent processing. Due to the static sizes, this scheme performs better
151+ // than copying the exact number of bytes needed into the end and beginning of
152+ // the buffer.
123153//
124154// The buffer is uninitialized, which improves performance, but may preclude
125155// efficient implementation of alternative approaches. The improvement is not so
@@ -142,12 +172,12 @@ unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize)
142172//
143173// In order to make `SipHasher128` consistent with `SipHasher` in libstd, we
144174// choose to do the integer to byte sequence conversion in the platform-
145- // dependent way. Clients can achieve (nearly) platform-independent hashing by
146- // widening `isize` and `usize` integers to 64 bits on 32-bit systems and
147- // byte-swapping integers on big-endian systems before passing them to the
148- // writing functions. This causes the input byte sequence to look identical on
149- // big- and little- endian systems (supposing `isize` and `usize` values can be
150- // represented in 32 bits), which ensures platform-independent results.
175+ // dependent way. Clients can achieve platform-independent hashing by widening
176+ // `isize` and `usize` integers to 64 bits on 32-bit systems and byte-swapping
177+ // integers on big-endian systems before passing them to the writing functions.
178+ // This causes the input byte sequence to look identical on big- and little-
179+ // endian systems (supposing `isize` and `usize` values can be represented in 32
180+ // bits), which ensures platform-independent results.
151181impl SipHasher128 {
152182 #[ inline]
153183 pub fn new_with_keys ( key0 : u64 , key1 : u64 ) -> SipHasher128 {
@@ -178,10 +208,10 @@ impl SipHasher128 {
178208 let size = mem:: size_of :: < T > ( ) ;
179209 let nbuf = self . nbuf ;
180210 debug_assert ! ( size <= 8 ) ;
181- debug_assert ! ( nbuf < BUFFER_SIZE_BYTES ) ;
182- debug_assert ! ( nbuf + size < BUFFER_SIZE_BYTES_SPILL ) ;
211+ debug_assert ! ( nbuf < BUFFER_SIZE ) ;
212+ debug_assert ! ( nbuf + size < BUFFER_WITH_SPILL_SIZE ) ;
183213
184- if nbuf + size < BUFFER_SIZE_BYTES {
214+ if nbuf + size < BUFFER_SIZE {
185215 unsafe {
186216 // The memcpy call is optimized away because the size is known.
187217 let dst = ( self . buf . as_mut_ptr ( ) as * mut u8 ) . add ( nbuf) ;
@@ -207,17 +237,17 @@ impl SipHasher128 {
207237 let size = mem:: size_of :: < T > ( ) ;
208238 let nbuf = self . nbuf ;
209239 debug_assert ! ( size <= 8 ) ;
210- debug_assert ! ( nbuf < BUFFER_SIZE_BYTES ) ;
211- debug_assert ! ( nbuf + size >= BUFFER_SIZE_BYTES ) ;
212- debug_assert ! ( nbuf + size < BUFFER_SIZE_BYTES_SPILL ) ;
240+ debug_assert ! ( nbuf < BUFFER_SIZE ) ;
241+ debug_assert ! ( nbuf + size >= BUFFER_SIZE ) ;
242+ debug_assert ! ( nbuf + size < BUFFER_WITH_SPILL_SIZE ) ;
213243
214244 // Copy first part of input into end of buffer, possibly into spill
215245 // element. The memcpy call is optimized away because the size is known.
216246 let dst = ( self . buf . as_mut_ptr ( ) as * mut u8 ) . add ( nbuf) ;
217247 ptr:: copy_nonoverlapping ( & x as * const _ as * const u8 , dst, size) ;
218248
219249 // Process buffer.
220- for i in 0 ..BUFFER_SIZE_ELEMS {
250+ for i in 0 ..BUFFER_CAPACITY {
221251 let elem = self . buf . get_unchecked ( i) . assume_init ( ) . to_le ( ) ;
222252 self . state . v3 ^= elem;
223253 Sip24Rounds :: c_rounds ( & mut self . state ) ;
@@ -234,18 +264,18 @@ impl SipHasher128 {
234264 // This function should only be called when the write fills the buffer.
235265 // Therefore, when size == 1, the new `self.nbuf` must be zero. The size
236266 // is statically known, so the branch is optimized away.
237- self . nbuf = if size == 1 { 0 } else { nbuf + size - BUFFER_SIZE_BYTES } ;
238- self . processed += BUFFER_SIZE_BYTES ;
267+ self . nbuf = if size == 1 { 0 } else { nbuf + size - BUFFER_SIZE } ;
268+ self . processed += BUFFER_SIZE ;
239269 }
240270
241271 // A write function for byte slices.
242272 #[ inline]
243273 fn slice_write ( & mut self , msg : & [ u8 ] ) {
244274 let length = msg. len ( ) ;
245275 let nbuf = self . nbuf ;
246- debug_assert ! ( nbuf < BUFFER_SIZE_BYTES ) ;
276+ debug_assert ! ( nbuf < BUFFER_SIZE ) ;
247277
248- if nbuf + length < BUFFER_SIZE_BYTES {
278+ if nbuf + length < BUFFER_SIZE {
249279 unsafe {
250280 let dst = ( self . buf . as_mut_ptr ( ) as * mut u8 ) . add ( nbuf) ;
251281
@@ -275,8 +305,8 @@ impl SipHasher128 {
275305 unsafe fn slice_write_process_buffer ( & mut self , msg : & [ u8 ] ) {
276306 let length = msg. len ( ) ;
277307 let nbuf = self . nbuf ;
278- debug_assert ! ( nbuf < BUFFER_SIZE_BYTES ) ;
279- debug_assert ! ( nbuf + length >= BUFFER_SIZE_BYTES ) ;
308+ debug_assert ! ( nbuf < BUFFER_SIZE ) ;
309+ debug_assert ! ( nbuf + length >= BUFFER_SIZE ) ;
280310
281311 // Always copy first part of input into current element of buffer.
282312 // This function should only be called when the write fills the buffer,
@@ -328,7 +358,7 @@ impl SipHasher128 {
328358
329359 #[ inline]
330360 pub fn finish128 ( mut self ) -> ( u64 , u64 ) {
331- debug_assert ! ( self . nbuf < BUFFER_SIZE_BYTES ) ;
361+ debug_assert ! ( self . nbuf < BUFFER_SIZE ) ;
332362
333363 // Process full elements in buffer.
334364 let last = self . nbuf / ELEM_SIZE ;
0 commit comments