@@ -92,6 +92,13 @@ impl<'a> fmt::Debug for Unit<'a> {
9292 }
9393}
9494
95+ /// A small structure used to "intern" `Unit` values.
96+ ///
97+ /// A `Unit` is just a thin pointer to an internal `UnitInner`. This is done to
98+ /// ensure that `Unit` itself is quite small as well as enabling a very
99+ /// efficient hash/equality implementation for `Unit`. All units are
100+ /// manufactured through an interner which guarantees that each equivalent value
101+ /// is only produced once.
95102pub struct UnitInterner < ' a > {
96103 state : RefCell < InternerState < ' a > > ,
97104}
@@ -101,6 +108,7 @@ struct InternerState<'a> {
101108}
102109
103110impl < ' a > UnitInterner < ' a > {
111+ /// Creates a new blank interner
104112 pub fn new ( ) -> UnitInterner < ' a > {
105113 UnitInterner {
106114 state : RefCell :: new ( InternerState {
@@ -109,6 +117,9 @@ impl<'a> UnitInterner<'a> {
109117 }
110118 }
111119
120+ /// Creates a new `unit` from its components. The returned `Unit`'s fields
121+ /// will all be equivalent to the provided arguments, although they may not
122+ /// be the exact same instance.
112123 pub fn intern (
113124 & ' a self ,
114125 pkg : & ' a Package ,
@@ -127,9 +138,30 @@ impl<'a> UnitInterner<'a> {
127138 Unit { inner }
128139 }
129140
141+ // Ok so interning here is a little unsafe, hence the usage of `unsafe`
142+ // internally. The primary issue here is that we've got an internal cache of
143+ // `UnitInner` instances added so far, but we may need to mutate it to add
144+ // it, and the mutation for an interner happens behind a shared borrow.
145+ //
146+ // Our goal though is to escape the lifetime `borrow_mut` to the same
147+ // lifetime as the borrowed passed into this function. That's where `unsafe`
148+ // comes into play. What we're subverting here is resizing internally in the
149+ // `HashSet` as well as overwriting previous keys in the `HashSet`.
150+ //
151+ // As a result we store `Box<UnitInner>` internally to have an extra layer
152+ // of indirection. That way `*const UnitInner` is a stable address that
153+ // doesn't change with `HashSet` resizing. Furthermore we're careful to
154+ // never overwrite an entry once inserted.
155+ //
156+ // Ideally we'd use an off-the-shelf interner from crates.io which avoids a
157+ // small amount of unsafety here, but at the time this was written one
158+ // wasn't obviously available.
130159 fn intern_inner ( & ' a self , item : & UnitInner < ' a > ) -> & ' a UnitInner < ' a > {
131160 let mut me = self . state . borrow_mut ( ) ;
132161 if let Some ( item) = me. cache . get ( item) {
162+ // note that `item` has type `&Box<UnitInner<'a>`. Use `&**` to
163+ // convert that to `&UnitInner<'a>`, then do some trickery to extend
164+ // the lifetime to the `'a` on the function here.
133165 return unsafe { & * ( & * * item as * const UnitInner < ' a > ) } ;
134166 }
135167 me. cache . insert ( Box :: new ( item. clone ( ) ) ) ;
0 commit comments