33use std:: borrow:: Cow ;
44use std:: convert:: { TryFrom , TryInto } ;
55use std:: fmt;
6+ use std:: hash;
67use std:: iter;
78use std:: ops:: { Deref , Range } ;
89use std:: ptr;
@@ -25,7 +26,9 @@ use crate::ty;
2526/// Its public API is rather low-level, working directly with allocation offsets and a custom error
2627/// type to account for the lack of an AllocId on this level. The Miri/CTFE core engine `memory`
2728/// module provides higher-level access.
28- #[ derive( Clone , Debug , Eq , PartialEq , PartialOrd , Ord , Hash , TyEncodable , TyDecodable ) ]
29+ // Note: for performance reasons when interning, some of the `Allocation` fields can be partially
30+ // hashed. (see the `Hash` impl below for more details), so the impl is not derived.
31+ #[ derive( Clone , Debug , Eq , PartialEq , PartialOrd , Ord , TyEncodable , TyDecodable ) ]
2932#[ derive( HashStable ) ]
3033pub struct Allocation < Tag = AllocId , Extra = ( ) > {
3134 /// The actual bytes of the allocation.
@@ -49,6 +52,46 @@ pub struct Allocation<Tag = AllocId, Extra = ()> {
4952 pub extra : Extra ,
5053}
5154
55+ /// This is the maximum size we will hash at a time, when interning an `Allocation` and its
56+ /// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer.
57+ /// Used when these two structures are large: we only partially hash the larger fields in that
58+ /// situation. See the comment at the top of their respective `Hash` impl for more details.
59+ const MAX_BYTES_TO_HASH : usize = 64 ;
60+
61+ /// This is the maximum size (in bytes) for which a buffer will be fully hashed, when interning.
62+ /// Otherwise, it will be partially hashed in 2 slices, requiring at least 2 `MAX_BYTES_TO_HASH`
63+ /// bytes.
64+ const MAX_HASHED_BUFFER_LEN : usize = 2 * MAX_BYTES_TO_HASH ;
65+
66+ // Const allocations are only hashed for interning. However, they can be large, making the hashing
67+ // expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
68+ // big buffers like the actual bytes of allocation. We can partially hash some fields when they're
69+ // large.
70+ impl hash:: Hash for Allocation {
71+ fn hash < H : hash:: Hasher > ( & self , state : & mut H ) {
72+ // Partially hash the `bytes` buffer when it is large. To limit collisions with common
73+ // prefixes and suffixes, we hash the length and some slices of the buffer.
74+ let byte_count = self . bytes . len ( ) ;
75+ if byte_count > MAX_HASHED_BUFFER_LEN {
76+ // Hash the buffer's length.
77+ byte_count. hash ( state) ;
78+
79+ // And its head and tail.
80+ self . bytes [ ..MAX_BYTES_TO_HASH ] . hash ( state) ;
81+ self . bytes [ byte_count - MAX_BYTES_TO_HASH ..] . hash ( state) ;
82+ } else {
83+ self . bytes . hash ( state) ;
84+ }
85+
86+ // Hash the other fields as usual.
87+ self . relocations . hash ( state) ;
88+ self . init_mask . hash ( state) ;
89+ self . align . hash ( state) ;
90+ self . mutability . hash ( state) ;
91+ self . extra . hash ( state) ;
92+ }
93+ }
94+
5295/// Interned types generally have an `Outer` type and an `Inner` type, where
5396/// `Outer` is a newtype around `Interned<Inner>`, and all the operations are
5497/// done on `Outer`, because all occurrences are interned. E.g. `Ty` is an
@@ -665,13 +708,43 @@ type Block = u64;
665708
666709/// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte
667710/// is initialized. If it is `false` the byte is uninitialized.
668- #[ derive( Clone , Debug , Eq , PartialEq , PartialOrd , Ord , Hash , TyEncodable , TyDecodable ) ]
711+ // Note: for performance reasons when interning, some of the `InitMask` fields can be partially
712+ // hashed. (see the `Hash` impl below for more details), so the impl is not derived.
713+ #[ derive( Clone , Debug , Eq , PartialEq , PartialOrd , Ord , TyEncodable , TyDecodable ) ]
669714#[ derive( HashStable ) ]
670715pub struct InitMask {
671716 blocks : Vec < Block > ,
672717 len : Size ,
673718}
674719
720+ // Const allocations are only hashed for interning. However, they can be large, making the hashing
721+ // expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
722+ // big buffers like the allocation's init mask. We can partially hash some fields when they're
723+ // large.
724+ impl hash:: Hash for InitMask {
725+ fn hash < H : hash:: Hasher > ( & self , state : & mut H ) {
726+ const MAX_BLOCKS_TO_HASH : usize = MAX_BYTES_TO_HASH / std:: mem:: size_of :: < Block > ( ) ;
727+ const MAX_BLOCKS_LEN : usize = MAX_HASHED_BUFFER_LEN / std:: mem:: size_of :: < Block > ( ) ;
728+
729+ // Partially hash the `blocks` buffer when it is large. To limit collisions with common
730+ // prefixes and suffixes, we hash the length and some slices of the buffer.
731+ let block_count = self . blocks . len ( ) ;
732+ if block_count > MAX_BLOCKS_LEN {
733+ // Hash the buffer's length.
734+ block_count. hash ( state) ;
735+
736+ // And its head and tail.
737+ self . blocks [ ..MAX_BLOCKS_TO_HASH ] . hash ( state) ;
738+ self . blocks [ block_count - MAX_BLOCKS_TO_HASH ..] . hash ( state) ;
739+ } else {
740+ self . blocks . hash ( state) ;
741+ }
742+
743+ // Hash the other fields as usual.
744+ self . len . hash ( state) ;
745+ }
746+ }
747+
675748impl InitMask {
676749 pub const BLOCK_SIZE : u64 = 64 ;
677750
0 commit comments