@@ -333,6 +333,12 @@ pub trait Hasher {
333333 ///
334334 /// println!("Hash is {:x}!", hasher.finish());
335335 /// ```
336+ ///
337+ /// # Note to Implementers
338+ ///
339+ /// You generally should not do length-prefixing as part of implementing
340+ /// this method. It's up to the [`Hash`] implementation to call
341+ /// [`Hasher::write_length_prefix`] before sequences that need it.
336342 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
337343 fn write ( & mut self , bytes : & [ u8 ] ) ;
338344
@@ -409,6 +415,96 @@ pub trait Hasher {
409415 fn write_isize ( & mut self , i : isize ) {
410416 self . write_usize ( i as usize )
411417 }
418+
419+ /// Writes a length prefix into this hasher, as part of being prefix-free.
420+ ///
421+ /// If you're implementing [`Hash`] for a custom collection, call this before
422+ /// writing its contents to this `Hasher`. That way
423+ /// `(collection![1, 2, 3], collection![4, 5])` and
424+ /// `(collection![1, 2], collection![3, 4, 5])` will provide different
425+ /// sequences of values to the `Hasher`
426+ ///
427+ /// The `impl<T> Hash for [T]` includes a call to this method, so if you're
428+ /// hashing a slice (or array or vector) via its `Hash::hash` method,
429+ /// you should **not** call this yourself.
430+ ///
431+ /// This method is only for providing domain separation. If you want to
432+ /// hash a `usize` that represents part of the *data*, then it's important
433+ /// that you pass it to [`Hasher::write_usize`] instead of to this method.
434+ ///
435+ /// # Examples
436+ ///
437+ /// ```
438+ /// #![feature(hasher_prefixfree_extras)]
439+ /// # // Stubs to make the `impl` below pass the compiler
440+ /// # struct MyCollection<T>(Option<T>);
441+ /// # impl<T> MyCollection<T> {
442+ /// # fn len(&self) -> usize { todo!() }
443+ /// # }
444+ /// # impl<'a, T> IntoIterator for &'a MyCollection<T> {
445+ /// # type Item = T;
446+ /// # type IntoIter = std::iter::Empty<T>;
447+ /// # fn into_iter(self) -> Self::IntoIter { todo!() }
448+ /// # }
449+ ///
450+ /// use std::hash::{Hash, Hasher};
451+ /// impl<T: Hash> Hash for MyCollection<T> {
452+ /// fn hash<H: Hasher>(&self, state: &mut H) {
453+ /// state.write_length_prefix(self.len());
454+ /// for elt in self {
455+ /// elt.hash(state);
456+ /// }
457+ /// }
458+ /// }
459+ /// ```
460+ ///
461+ /// # Note to Implementers
462+ ///
463+ /// If you've decided that your `Hasher` is willing to be susceptible to
464+ /// Hash-DoS attacks, then you might consider skipping hashing some or all
465+ /// of the `len` provided in the name of increased performance.
466+ #[ inline]
467+ #[ unstable( feature = "hasher_prefixfree_extras" , issue = "96762" ) ]
468+ fn write_length_prefix ( & mut self , len : usize ) {
469+ self . write_usize ( len) ;
470+ }
471+
472+ /// Writes a single `str` into this hasher.
473+ ///
474+ /// If you're implementing [`Hash`], you generally do not need to call this,
475+ /// as the `impl Hash for str` does, so you should prefer that instead.
476+ ///
477+ /// This includes the domain separator for prefix-freedom, so you should
478+ /// **not** call `Self::write_length_prefix` before calling this.
479+ ///
480+ /// # Note to Implementers
481+ ///
482+ /// The default implementation of this method includes a call to
483+ /// [`Self::write_length_prefix`], so if your implementation of `Hasher`
484+ /// doesn't care about prefix-freedom and you've thus overridden
485+ /// that method to do nothing, there's no need to override this one.
486+ ///
487+ /// This method is available to be overridden separately from the others
488+ /// as `str` being UTF-8 means that it never contains `0xFF` bytes, which
489+ /// can be used to provide prefix-freedom cheaper than hashing a length.
490+ ///
491+ /// For example, if your `Hasher` works byte-by-byte (perhaps by accumulating
492+ /// them into a buffer), then you can hash the bytes of the `str` followed
493+ /// by a single `0xFF` byte.
494+ ///
495+ /// If your `Hasher` works in chunks, you can also do this by being careful
496+ /// about how you pad partial chunks. If the chunks are padded with `0x00`
497+ /// bytes then just hashing an extra `0xFF` byte doesn't necessarily
498+ /// provide prefix-freedom, as `"ab"` and `"ab\u{0}"` would likely hash
499+ /// the same sequence of chunks. But if you pad with `0xFF` bytes instead,
500+ /// ensuring at least one padding byte, then it can often provide
501+ /// prefix-freedom cheaper than hashing the length would.
502+ #[ inline]
503+ #[ unstable( feature = "hasher_prefixfree_extras" , issue = "96762" ) ]
504+ fn write_str ( & mut self , s : & str ) {
505+ self . write_length_prefix ( s. len ( ) ) ;
506+ self . write ( s. as_bytes ( ) ) ;
507+ }
412508}
413509
414510#[ stable( feature = "indirect_hasher_impl" , since = "1.22.0" ) ]
@@ -455,6 +551,12 @@ impl<H: Hasher + ?Sized> Hasher for &mut H {
455551 fn write_isize ( & mut self , i : isize ) {
456552 ( * * self ) . write_isize ( i)
457553 }
554+ fn write_length_prefix ( & mut self , len : usize ) {
555+ ( * * self ) . write_length_prefix ( len)
556+ }
557+ fn write_str ( & mut self , s : & str ) {
558+ ( * * self ) . write_str ( s)
559+ }
458560}
459561
460562/// A trait for creating instances of [`Hasher`].
@@ -709,8 +811,7 @@ mod impls {
709811 impl Hash for str {
710812 #[ inline]
711813 fn hash < H : Hasher > ( & self , state : & mut H ) {
712- state. write ( self . as_bytes ( ) ) ;
713- state. write_u8 ( 0xff )
814+ state. write_str ( self ) ;
714815 }
715816 }
716817
@@ -767,7 +868,7 @@ mod impls {
767868 impl < T : Hash > Hash for [ T ] {
768869 #[ inline]
769870 fn hash < H : Hasher > ( & self , state : & mut H ) {
770- self . len ( ) . hash ( state ) ;
871+ state . write_length_prefix ( self . len ( ) ) ;
771872 Hash :: hash_slice ( self , state)
772873 }
773874 }
0 commit comments