|
| 1 | +use crate::{ |
| 2 | + blob_tree::handle::BlobIndirection, |
| 3 | + file::BLOBS_FOLDER, |
| 4 | + table::Table, |
| 5 | + tree::ingest::Ingestion as TableIngestion, |
| 6 | + vlog::{BlobFileWriter, ValueHandle}, |
| 7 | + SeqNo, UserKey, UserValue, |
| 8 | +}; |
| 9 | + |
| 10 | +/// Bulk ingestion for BlobTree |
| 11 | +/// |
| 12 | +/// Items NEED to be added in ascending key order. |
| 13 | +pub struct BlobIngestion<'a> { |
| 14 | + tree: &'a crate::BlobTree, |
| 15 | + pub(crate) table: TableIngestion<'a>, |
| 16 | + pub(crate) blob: BlobFileWriter, |
| 17 | + seqno: SeqNo, |
| 18 | + separation_threshold: u32, |
| 19 | + last_key: Option<UserKey>, |
| 20 | +} |
| 21 | + |
| 22 | +impl<'a> BlobIngestion<'a> { |
| 23 | + /// Creates a new ingestion. |
| 24 | + /// |
| 25 | + /// # Errors |
| 26 | + /// |
| 27 | + /// Will return `Err` if an IO error occurs. |
| 28 | + pub fn new(tree: &'a crate::BlobTree) -> crate::Result<Self> { |
| 29 | + let kv = tree |
| 30 | + .index |
| 31 | + .config |
| 32 | + .kv_separation_opts |
| 33 | + .as_ref() |
| 34 | + .expect("kv separation options should exist"); |
| 35 | + |
| 36 | + let blob_file_size = kv.file_target_size; |
| 37 | + |
| 38 | + let table = TableIngestion::new(&tree.index)?; |
| 39 | + let blob = BlobFileWriter::new( |
| 40 | + tree.index.0.blob_file_id_counter.clone(), |
| 41 | + blob_file_size, |
| 42 | + tree.index.config.path.join(BLOBS_FOLDER), |
| 43 | + )? |
| 44 | + .use_compression(kv.compression); |
| 45 | + |
| 46 | + let separation_threshold = kv.separation_threshold; |
| 47 | + |
| 48 | + Ok(Self { |
| 49 | + tree, |
| 50 | + table, |
| 51 | + blob, |
| 52 | + seqno: 0, |
| 53 | + separation_threshold, |
| 54 | + last_key: None, |
| 55 | + }) |
| 56 | + } |
| 57 | + |
| 58 | + /// Sets the ingestion seqno. |
| 59 | + #[must_use] |
| 60 | + pub fn with_seqno(mut self, seqno: SeqNo) -> Self { |
| 61 | + self.seqno = seqno; |
| 62 | + self.table = self.table.with_seqno(seqno); |
| 63 | + self |
| 64 | + } |
| 65 | + |
| 66 | + /// Writes a key-value pair. |
| 67 | + /// |
| 68 | + /// # Errors |
| 69 | + /// |
| 70 | + /// Will return `Err` if an IO error occurs. |
| 71 | + pub fn write(&mut self, key: UserKey, value: UserValue) -> crate::Result<()> { |
| 72 | + // Check order before any blob I/O to avoid partial writes on failure |
| 73 | + if let Some(prev) = &self.last_key { |
| 74 | + assert!( |
| 75 | + key > *prev, |
| 76 | + "next key in ingestion must be greater than last key" |
| 77 | + ); |
| 78 | + } |
| 79 | + |
| 80 | + #[allow(clippy::cast_possible_truncation)] |
| 81 | + let value_size = value.len() as u32; |
| 82 | + |
| 83 | + if value_size >= self.separation_threshold { |
| 84 | + let offset = self.blob.offset(); |
| 85 | + let blob_file_id = self.blob.blob_file_id(); |
| 86 | + let on_disk_size = self.blob.write(&key, self.seqno, &value)?; |
| 87 | + |
| 88 | + let indirection = BlobIndirection { |
| 89 | + vhandle: ValueHandle { |
| 90 | + blob_file_id, |
| 91 | + offset, |
| 92 | + on_disk_size, |
| 93 | + }, |
| 94 | + size: value_size, |
| 95 | + }; |
| 96 | + |
| 97 | + let cloned_key = key.clone(); |
| 98 | + let res = self.table.write_indirection(key, indirection); |
| 99 | + if res.is_ok() { |
| 100 | + self.last_key = Some(cloned_key); |
| 101 | + } |
| 102 | + res |
| 103 | + } else { |
| 104 | + let cloned_key = key.clone(); |
| 105 | + let res = self.table.write(key, value); |
| 106 | + if res.is_ok() { |
| 107 | + self.last_key = Some(cloned_key); |
| 108 | + } |
| 109 | + res |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + /// Writes a tombstone for a key. |
| 114 | + /// |
| 115 | + /// # Errors |
| 116 | + /// |
| 117 | + /// Will return `Err` if an IO error occurs. |
| 118 | + pub fn write_tombstone(&mut self, key: UserKey) -> crate::Result<()> { |
| 119 | + if let Some(prev) = &self.last_key { |
| 120 | + assert!( |
| 121 | + key > *prev, |
| 122 | + "next key in ingestion must be greater than last key" |
| 123 | + ); |
| 124 | + } |
| 125 | + |
| 126 | + let cloned_key = key.clone(); |
| 127 | + let res = self.table.write_tombstone(key); |
| 128 | + if res.is_ok() { |
| 129 | + self.last_key = Some(cloned_key); |
| 130 | + } |
| 131 | + res |
| 132 | + } |
| 133 | + |
| 134 | + /// Finishes the ingestion. |
| 135 | + /// |
| 136 | + /// # Errors |
| 137 | + /// |
| 138 | + /// Will return `Err` if an IO error occurs. |
| 139 | + pub fn finish(self) -> crate::Result<()> { |
| 140 | + use crate::AbstractTree; |
| 141 | + |
| 142 | + // Capture required handles before consuming fields during finalization |
| 143 | + let index = self.index().clone(); |
| 144 | + let tree = self.tree.clone(); |
| 145 | + |
| 146 | + let blob_files = self.blob.finish()?; |
| 147 | + let results = self.table.writer.finish()?; |
| 148 | + |
| 149 | + let pin_filter = index.config.filter_block_pinning_policy.get(0); |
| 150 | + let pin_index = index.config.index_block_pinning_policy.get(0); |
| 151 | + |
| 152 | + let created_tables = results |
| 153 | + .into_iter() |
| 154 | + .map(|(table_id, checksum)| -> crate::Result<Table> { |
| 155 | + Table::recover( |
| 156 | + index |
| 157 | + .config |
| 158 | + .path |
| 159 | + .join(crate::file::TABLES_FOLDER) |
| 160 | + .join(table_id.to_string()), |
| 161 | + checksum, |
| 162 | + index.id, |
| 163 | + index.config.cache.clone(), |
| 164 | + index.config.descriptor_table.clone(), |
| 165 | + pin_filter, |
| 166 | + pin_index, |
| 167 | + #[cfg(feature = "metrics")] |
| 168 | + index.metrics.clone(), |
| 169 | + ) |
| 170 | + }) |
| 171 | + .collect::<crate::Result<Vec<_>>>()?; |
| 172 | + |
| 173 | + tree.register_tables(&created_tables, Some(&blob_files), None, &[], 0)?; |
| 174 | + |
| 175 | + Ok(()) |
| 176 | + } |
| 177 | + |
| 178 | + #[inline] |
| 179 | + fn index(&self) -> &crate::Tree { |
| 180 | + &self.tree.index |
| 181 | + } |
| 182 | +} |
0 commit comments