@@ -96,28 +96,30 @@ macro_rules! compress {
9696unsafe fn copy_nonoverlapping_small ( src : * const u8 , dst : * mut u8 , count : usize ) {
9797 debug_assert ! ( count <= 8 ) ;
9898
99- if count == 8 {
100- ptr:: copy_nonoverlapping ( src, dst, 8 ) ;
101- return ;
102- }
99+ unsafe {
100+ if count == 8 {
101+ ptr:: copy_nonoverlapping ( src, dst, 8 ) ;
102+ return ;
103+ }
103104
104- let mut i = 0 ;
105- if i + 3 < count {
106- ptr:: copy_nonoverlapping ( src. add ( i) , dst. add ( i) , 4 ) ;
107- i += 4 ;
108- }
105+ let mut i = 0 ;
106+ if i + 3 < count {
107+ ptr:: copy_nonoverlapping ( src. add ( i) , dst. add ( i) , 4 ) ;
108+ i += 4 ;
109+ }
109110
110- if i + 1 < count {
111- ptr:: copy_nonoverlapping ( src. add ( i) , dst. add ( i) , 2 ) ;
112- i += 2
113- }
111+ if i + 1 < count {
112+ ptr:: copy_nonoverlapping ( src. add ( i) , dst. add ( i) , 2 ) ;
113+ i += 2
114+ }
114115
115- if i < count {
116- * dst. add ( i) = * src. add ( i) ;
117- i += 1 ;
118- }
116+ if i < count {
117+ * dst. add ( i) = * src. add ( i) ;
118+ i += 1 ;
119+ }
119120
120- debug_assert_eq ! ( i, count) ;
121+ debug_assert_eq ! ( i, count) ;
122+ }
121123}
122124
123125// # Implementation
@@ -232,38 +234,40 @@ impl SipHasher128 {
232234 // overflow) if it wasn't already.
233235 #[ inline( never) ]
234236 unsafe fn short_write_process_buffer < const LEN : usize > ( & mut self , bytes : [ u8 ; LEN ] ) {
235- let nbuf = self . nbuf ;
236- debug_assert ! ( LEN <= 8 ) ;
237- debug_assert ! ( nbuf < BUFFER_SIZE ) ;
238- debug_assert ! ( nbuf + LEN >= BUFFER_SIZE ) ;
239- debug_assert ! ( nbuf + LEN < BUFFER_WITH_SPILL_SIZE ) ;
237+ unsafe {
238+ let nbuf = self . nbuf ;
239+ debug_assert ! ( LEN <= 8 ) ;
240+ debug_assert ! ( nbuf < BUFFER_SIZE ) ;
241+ debug_assert ! ( nbuf + LEN >= BUFFER_SIZE ) ;
242+ debug_assert ! ( nbuf + LEN < BUFFER_WITH_SPILL_SIZE ) ;
243+
244+ // Copy first part of input into end of buffer, possibly into spill
245+ // element. The memcpy call is optimized away because the size is known.
246+ let dst = ( self . buf . as_mut_ptr ( ) as * mut u8 ) . add ( nbuf) ;
247+ ptr:: copy_nonoverlapping ( bytes. as_ptr ( ) , dst, LEN ) ;
248+
249+ // Process buffer.
250+ for i in 0 ..BUFFER_CAPACITY {
251+ let elem = self . buf . get_unchecked ( i) . assume_init ( ) . to_le ( ) ;
252+ self . state . v3 ^= elem;
253+ Sip13Rounds :: c_rounds ( & mut self . state ) ;
254+ self . state . v0 ^= elem;
255+ }
240256
241- // Copy first part of input into end of buffer, possibly into spill
242- // element. The memcpy call is optimized away because the size is known.
243- let dst = ( self . buf . as_mut_ptr ( ) as * mut u8 ) . add ( nbuf) ;
244- ptr:: copy_nonoverlapping ( bytes. as_ptr ( ) , dst, LEN ) ;
245-
246- // Process buffer.
247- for i in 0 ..BUFFER_CAPACITY {
248- let elem = self . buf . get_unchecked ( i) . assume_init ( ) . to_le ( ) ;
249- self . state . v3 ^= elem;
250- Sip13Rounds :: c_rounds ( & mut self . state ) ;
251- self . state . v0 ^= elem;
257+ // Copy remaining input into start of buffer by copying LEN - 1
258+ // elements from spill (at most LEN - 1 bytes could have overflowed
259+ // into the spill). The memcpy call is optimized away because the size
260+ // is known. And the whole copy is optimized away for LEN == 1.
261+ let dst = self . buf . as_mut_ptr ( ) as * mut u8 ;
262+ let src = self . buf . get_unchecked ( BUFFER_SPILL_INDEX ) as * const _ as * const u8 ;
263+ ptr:: copy_nonoverlapping ( src, dst, LEN - 1 ) ;
264+
265+ // This function should only be called when the write fills the buffer.
266+ // Therefore, when LEN == 1, the new `self.nbuf` must be zero.
267+ // LEN is statically known, so the branch is optimized away.
268+ self . nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE } ;
269+ self . processed += BUFFER_SIZE ;
252270 }
253-
254- // Copy remaining input into start of buffer by copying LEN - 1
255- // elements from spill (at most LEN - 1 bytes could have overflowed
256- // into the spill). The memcpy call is optimized away because the size
257- // is known. And the whole copy is optimized away for LEN == 1.
258- let dst = self . buf . as_mut_ptr ( ) as * mut u8 ;
259- let src = self . buf . get_unchecked ( BUFFER_SPILL_INDEX ) as * const _ as * const u8 ;
260- ptr:: copy_nonoverlapping ( src, dst, LEN - 1 ) ;
261-
262- // This function should only be called when the write fills the buffer.
263- // Therefore, when LEN == 1, the new `self.nbuf` must be zero.
264- // LEN is statically known, so the branch is optimized away.
265- self . nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE } ;
266- self . processed += BUFFER_SIZE ;
267271 }
268272
269273 // A write function for byte slices.
@@ -301,57 +305,59 @@ impl SipHasher128 {
301305 // containing the byte offset `self.nbuf`.
302306 #[ inline( never) ]
303307 unsafe fn slice_write_process_buffer ( & mut self , msg : & [ u8 ] ) {
304- let length = msg. len ( ) ;
305- let nbuf = self . nbuf ;
306- debug_assert ! ( nbuf < BUFFER_SIZE ) ;
307- debug_assert ! ( nbuf + length >= BUFFER_SIZE ) ;
308-
309- // Always copy first part of input into current element of buffer.
310- // This function should only be called when the write fills the buffer,
311- // so we know that there is enough input to fill the current element.
312- let valid_in_elem = nbuf % ELEM_SIZE ;
313- let needed_in_elem = ELEM_SIZE - valid_in_elem;
314-
315- let src = msg. as_ptr ( ) ;
316- let dst = ( self . buf . as_mut_ptr ( ) as * mut u8 ) . add ( nbuf) ;
317- copy_nonoverlapping_small ( src, dst, needed_in_elem) ;
318-
319- // Process buffer.
308+ unsafe {
309+ let length = msg. len ( ) ;
310+ let nbuf = self . nbuf ;
311+ debug_assert ! ( nbuf < BUFFER_SIZE ) ;
312+ debug_assert ! ( nbuf + length >= BUFFER_SIZE ) ;
313+
314+ // Always copy first part of input into current element of buffer.
315+ // This function should only be called when the write fills the buffer,
316+ // so we know that there is enough input to fill the current element.
317+ let valid_in_elem = nbuf % ELEM_SIZE ;
318+ let needed_in_elem = ELEM_SIZE - valid_in_elem;
319+
320+ let src = msg. as_ptr ( ) ;
321+ let dst = ( self . buf . as_mut_ptr ( ) as * mut u8 ) . add ( nbuf) ;
322+ copy_nonoverlapping_small ( src, dst, needed_in_elem) ;
323+
324+ // Process buffer.
325+
326+ // Using `nbuf / ELEM_SIZE + 1` rather than `(nbuf + needed_in_elem) /
327+ // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0.
328+ // We know that is true, because last step ensured we have a full
329+ // element in the buffer.
330+ let last = nbuf / ELEM_SIZE + 1 ;
331+
332+ for i in 0 ..last {
333+ let elem = self . buf . get_unchecked ( i) . assume_init ( ) . to_le ( ) ;
334+ self . state . v3 ^= elem;
335+ Sip13Rounds :: c_rounds ( & mut self . state ) ;
336+ self . state . v0 ^= elem;
337+ }
320338
321- // Using `nbuf / ELEM_SIZE + 1` rather than `(nbuf + needed_in_elem) /
322- // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0.
323- // We know that is true, because last step ensured we have a full
324- // element in the buffer.
325- let last = nbuf / ELEM_SIZE + 1 ;
339+ // Process the remaining element-sized chunks of input.
340+ let mut processed = needed_in_elem;
341+ let input_left = length - processed;
342+ let elems_left = input_left / ELEM_SIZE ;
343+ let extra_bytes_left = input_left % ELEM_SIZE ;
344+
345+ for _ in 0 ..elems_left {
346+ let elem = ( msg. as_ptr ( ) . add ( processed) as * const u64 ) . read_unaligned ( ) . to_le ( ) ;
347+ self . state . v3 ^= elem;
348+ Sip13Rounds :: c_rounds ( & mut self . state ) ;
349+ self . state . v0 ^= elem;
350+ processed += ELEM_SIZE ;
351+ }
326352
327- for i in 0 ..last {
328- let elem = self . buf . get_unchecked ( i) . assume_init ( ) . to_le ( ) ;
329- self . state . v3 ^= elem;
330- Sip13Rounds :: c_rounds ( & mut self . state ) ;
331- self . state . v0 ^= elem;
332- }
353+ // Copy remaining input into start of buffer.
354+ let src = msg. as_ptr ( ) . add ( processed) ;
355+ let dst = self . buf . as_mut_ptr ( ) as * mut u8 ;
356+ copy_nonoverlapping_small ( src, dst, extra_bytes_left) ;
333357
334- // Process the remaining element-sized chunks of input.
335- let mut processed = needed_in_elem;
336- let input_left = length - processed;
337- let elems_left = input_left / ELEM_SIZE ;
338- let extra_bytes_left = input_left % ELEM_SIZE ;
339-
340- for _ in 0 ..elems_left {
341- let elem = ( msg. as_ptr ( ) . add ( processed) as * const u64 ) . read_unaligned ( ) . to_le ( ) ;
342- self . state . v3 ^= elem;
343- Sip13Rounds :: c_rounds ( & mut self . state ) ;
344- self . state . v0 ^= elem;
345- processed += ELEM_SIZE ;
358+ self . nbuf = extra_bytes_left;
359+ self . processed += nbuf + processed;
346360 }
347-
348- // Copy remaining input into start of buffer.
349- let src = msg. as_ptr ( ) . add ( processed) ;
350- let dst = self . buf . as_mut_ptr ( ) as * mut u8 ;
351- copy_nonoverlapping_small ( src, dst, extra_bytes_left) ;
352-
353- self . nbuf = extra_bytes_left;
354- self . processed += nbuf + processed;
355361 }
356362
357363 #[ inline]
0 commit comments