Skip to content

Commit 0abd2a4

Browse files
committed
improve version selection logic and comments in get_internal_entry
1 parent 044b8a1 commit 0abd2a4

File tree

1 file changed

+27
-12
lines changed

1 file changed

+27
-12
lines changed

src/tree/mod.rs

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,39 +81,54 @@ impl AbstractTree for Tree {
8181

8282
#[expect(clippy::significant_drop_tightening)]
8383
fn get_internal_entry(&self, key: &[u8], seqno: SeqNo) -> crate::Result<Option<InternalValue>> {
84+
// Returns the newest visible version (`entry.seqno < seqno`) across all sources
85+
// for the given snapshot watermark `seqno`:
86+
// - Active memtable
87+
// - Sealed memtables (newest-first)
88+
// - Tables (newest-first)
89+
// Ingestion can write newer versions directly to tables with an explicit `seqno`,
90+
// so we compare seqnos across sources and keep the maximal visible version, then
91+
// apply tombstone semantics once to that winner.
92+
8493
let version_history_lock = self.version_history.read().expect("lock is poisoned");
8594
let super_version = version_history_lock.get_version_for_snapshot(seqno);
95+
// Avoid holding the read lock across potential I/O in table lookups
96+
drop(version_history_lock);
8697

87-
// Track best candidate by seqno across all sources
98+
// 1) Active memtable (in-memory)
99+
// If it yields the maximal visible seqno (seqno-1), no source can be newer.
88100
let mut best: Option<InternalValue> = super_version.active_memtable.get(key, seqno);
89-
// If the memtable hit matches the snapshot seqno, it's the maximal visible version
90-
if let Some(ref entry) = best {
91-
if entry.key.seqno == seqno {
92-
return Ok(ignore_tombstone_value(entry.clone()));
101+
if let Some(ref b) = best {
102+
if seqno > 0 && b.key.seqno + 1 == seqno {
103+
return Ok(ignore_tombstone_value(b.clone()));
93104
}
94105
}
95106

96-
// Sealed memtables are older than active; only consult them on miss
107+
// 2) Sealed memtables (in-memory, newest-first)
108+
// If active memtable already has a hit, sealed memtables cannot be newer.
97109
if best.is_none() {
98110
if let Some(entry) =
99111
Self::get_internal_entry_from_sealed_memtables(&super_version, key, seqno)
100112
{
113+
// Early-exit if sealed provided the maximal visible seqno.
114+
if seqno > 0 && entry.key.seqno + 1 == seqno {
115+
return Ok(ignore_tombstone_value(entry));
116+
}
101117
best = Some(entry);
102118
}
103119
}
104120

121+
// 3) Tables (on-disk), scanned newest-first within each level/run
105122
if let Some(entry) =
106123
self.get_internal_entry_from_tables(&super_version.version, key, seqno)?
107124
{
108-
if let Some(b) = best {
109-
if b.key.seqno >= entry.key.seqno {
110-
return Ok(ignore_tombstone_value(b));
111-
}
125+
match &best {
126+
Some(b) if b.key.seqno >= entry.key.seqno => {}
127+
_ => best = Some(entry),
112128
}
113-
return Ok(ignore_tombstone_value(entry));
114129
}
115130

116-
// Apply tombstone semantics after selecting the newest version
131+
// Apply tombstone semantics after selecting the newest visible version
117132
Ok(best.and_then(ignore_tombstone_value))
118133
}
119134

0 commit comments

Comments
 (0)