@@ -7,7 +7,7 @@ use serde::Serialize;
77
88use crate :: bfield_member:: { BFieldLookup , BFieldMember , BFieldVal } ;
99
10- /// The struct holding the various bfields
10+ /// The ` struct` holding the `BField` primary and secondary bit arrays.
1111pub struct BField < T > {
1212 members : Vec < BFieldMember < T > > ,
1313 read_only : bool ,
@@ -18,18 +18,26 @@ unsafe impl<T> Send for BField<T> {}
1818unsafe impl < T > Sync for BField < T > { }
1919
2020impl < T : Clone + DeserializeOwned + Serialize > BField < T > {
21- /// The (complicated) method to create a bfield.
22- /// The bfield files will be created in `directory` with the given `filename` and the
23- /// suffixes `(0..n_secondaries).bfd`
24- /// `size` is the primary bfield size, subsequent bfield sizes will be determined by
25- /// `secondary_scaledown` and `max_scaledown`.
26- /// If you set `in_memory` to true, remember to call `persist_to_disk` when it's built to
21+ /// A (rather complex) method for creating a `BField`.
22+ ///
23+ /// This will create a series of `BField` bit array files in `directory` with the given `filename` and the
24+ /// suffixes `(0..n_secondaries).bfd`. If you set `in_memory` to true, remember to call `persist_to_disk` once it's built to
2725 /// save it.
28- /// The params are the following in the paper:
29- /// `n_hashes` -> k
30- /// `marker_width` -> v (nu)
31- /// `n_marker_bits` -> κ (kappa)
32- /// `secondary_scaledown` -> β (beta)
26+ ///
27+ /// The following parameters are required. See the [README.md](https://github.com/onecodex/rust-bfield/)
28+ /// for additional details as well as the
29+ /// [parameter selection notebook](https://github.com/onecodex/rust-bfield/blob/main/docs/notebook/calculate-parameters.ipynb)
30+ /// for helpful guidance in picking optimal parameters.
31+ /// - `size` is the primary `BField` size, subsequent `BField` sizes will be determined
32+ /// by the `secondary_scaledown` and `max_scaledown` parameters
33+ /// - `n_hashes`. The number of hash functions _k_ to use.
34+ /// - `marker_width` or v (nu). The length of the bit-string to use for
35+ /// - `n_marker_bits` or κ (kappa). The number of 1s to set in each v-length bit-string (also its Hamming weight).
36+ /// - `secondary_scaledown` or β (beta). The scaling factor to use for each subsequent `BField` size.
37+ /// - `max_scaledown`. A maximum scaling factor to use for secondary `BField` sizes, since β raised to the power of
38+ /// `n_secondaries` can be impractically/needlessly small.
39+ /// - `n_secondaries`. The number of secondary `BField`s to create.
40+ /// - `in_memory`. Whether to create the `BField` in memory or on disk.
3341 #[ allow( clippy:: too_many_arguments) ]
3442 pub fn create < P > (
3543 directory : P ,
@@ -84,7 +92,7 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
8492 } )
8593 }
8694
87- /// Loads the bfield given the path to the "main" db path (eg the one ending with `0.bfd`).
95+ /// Loads the `BField` given the path to the primary array data file (eg the one ending with `0.bfd`).
8896 pub fn load < P : AsRef < Path > > ( main_db_path : P , read_only : bool ) -> Result < Self , io:: Error > {
8997 let mut members = Vec :: new ( ) ;
9098 let mut n = 0 ;
@@ -126,8 +134,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
126134 Ok ( BField { members, read_only } )
127135 }
128136
129- /// Write the current bfields to disk.
130- /// Only useful if you are creating a bfield in memory
137+ /// Write the current `BField` to disk.
138+ /// Only useful if you are creating a `BField` in memory.
131139 pub fn persist_to_disk ( self ) -> Result < Self , io:: Error > {
132140 let mut members = Vec :: with_capacity ( self . members . len ( ) ) ;
133141 for m in self . members {
@@ -139,32 +147,32 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
139147 } )
140148 }
141149
142- /// Returns (n_hashes, marker_width, n_marker_bits, Vec<size of each member>)
150+ /// Returns ` (n_hashes, marker_width, n_marker_bits, Vec<size of each member>)`.
143151 pub fn build_params ( & self ) -> ( u8 , u8 , u8 , Vec < usize > ) {
144152 let ( _, n_hashes, marker_width, n_marker_bits) = self . members [ 0 ] . info ( ) ;
145153 let sizes = self . members . iter ( ) . map ( |i| i. info ( ) . 0 ) . collect ( ) ;
146154 ( n_hashes, marker_width, n_marker_bits, sizes)
147155 }
148156
149- /// Returns the params given at build time to the bfields
157+ /// Returns the params given at build time to the `BField` arrays.
150158 pub fn params ( & self ) -> & Option < T > {
151159 & self . members [ 0 ] . params . other
152160 }
153161
154- /// This doesn't actually update the file, so we can use it to e.g.
155- /// simulate params on an old legacy file that may not actually have
156- /// them set.
162+ /// ⚠️ Method for setting parameters without actually updating any files on disk. **Only useful for supporting legacy file formats
163+ /// in which these parameters are not saved.**
157164 pub fn mock_params ( & mut self , params : T ) {
158165 self . members [ 0 ] . params . other = Some ( params) ;
159166 }
160167
161- /// This allows an insert of a value into the b-field after the entire
162- /// b-field build process has been completed.
163- ///
164- /// It has the very bad downside of potentially knocking other keys out
165- /// of the b-field by making them indeterminate (which will make them fall
166- /// back to the secondaries where they don't exist and thus it'll appear
167- /// as if they were never inserted to begin with)
168+ /// ⚠️ Method for inserting a value into a `BField`
169+ /// after it has been fully built and finalized.
170+ /// **This method should be used with extreme care**
171+ /// as it does not guarantee that keys are properly propagated
172+ /// to secondary arrays and therefore may make lookups of previously
173+ /// set values return an indeterminate result in the primary array,
174+ /// then causing fallback to the secondary arrays where they were never
175+ /// inserted (and returning a false negative).
168176 pub fn force_insert ( & self , key : & [ u8 ] , value : BFieldVal ) {
169177 debug_assert ! ( !self . read_only, "Can't insert into read_only bfields" ) ;
170178 for secondary in & self . members {
@@ -174,8 +182,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
174182 }
175183 }
176184
177- /// Insert the given key/value at the given pass
178- /// Returns whether the value was inserted during this call, eg will return `false` if
185+ /// Insert the given key/value at the given pass (1-indexed `BField` array/member).
186+ /// Returns whether the value was inserted during this call, i.e., will return `false` if
179187 /// the value was already present.
180188 pub fn insert ( & self , key : & [ u8 ] , value : BFieldVal , pass : usize ) -> bool {
181189 debug_assert ! ( !self . read_only, "Can't insert into read_only bfields" ) ;
@@ -195,8 +203,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
195203 true
196204 }
197205
198- /// Returns the value of the given key if found, None otherwise.
199- /// If the value is indeterminate, we still return None .
206+ /// Returns the value of the given key if found, ` None` otherwise.
207+ /// The current implementation also returns `None` for indeterminate values .
200208 pub fn get ( & self , key : & [ u8 ] ) -> Option < BFieldVal > {
201209 for secondary in self . members . iter ( ) {
202210 match secondary. get ( key) {
@@ -210,8 +218,8 @@ impl<T: Clone + DeserializeOwned + Serialize> BField<T> {
210218 None
211219 }
212220
213- /// Get the info of each member
214- /// Returns Vec<(size, n_hashes, marker_width, n_marker_bits)>
221+ /// Get the info of each secondary array (`BFieldMember`) in the `BField`.
222+ /// Returns ` Vec<(size, n_hashes, marker_width, n_marker_bits)>`.
215223 pub fn info ( & self ) -> Vec < ( usize , u8 , u8 , u8 ) > {
216224 self . members . iter ( ) . map ( |m| m. info ( ) ) . collect ( )
217225 }
0 commit comments