@@ -81,39 +81,54 @@ impl AbstractTree for Tree {
8181
8282 #[ expect( clippy:: significant_drop_tightening) ]
8383 fn get_internal_entry ( & self , key : & [ u8 ] , seqno : SeqNo ) -> crate :: Result < Option < InternalValue > > {
84+ // Returns the newest visible version (`entry.seqno < seqno`) across all sources
85+ // for the given snapshot watermark `seqno`:
86+ // - Active memtable
87+ // - Sealed memtables (newest-first)
88+ // - Tables (newest-first)
89+ // Ingestion can write newer versions directly to tables with an explicit `seqno`,
90+ // so we compare seqnos across sources and keep the maximal visible version, then
91+ // apply tombstone semantics once to that winner.
92+
8493 let version_history_lock = self . version_history . read ( ) . expect ( "lock is poisoned" ) ;
8594 let super_version = version_history_lock. get_version_for_snapshot ( seqno) ;
95+ // Avoid holding the read lock across potential I/O in table lookups
96+ drop ( version_history_lock) ;
8697
87- // Track best candidate by seqno across all sources
98+ // 1) Active memtable (in-memory)
99+ // If it yields the maximal visible seqno (seqno-1), no source can be newer.
88100 let mut best: Option < InternalValue > = super_version. active_memtable . get ( key, seqno) ;
89- // If the memtable hit matches the snapshot seqno, it's the maximal visible version
90- if let Some ( ref entry) = best {
91- if entry. key . seqno == seqno {
92- return Ok ( ignore_tombstone_value ( entry. clone ( ) ) ) ;
101+ if let Some ( ref b) = best {
102+ if seqno > 0 && b. key . seqno + 1 == seqno {
103+ return Ok ( ignore_tombstone_value ( b. clone ( ) ) ) ;
93104 }
94105 }
95106
96- // Sealed memtables are older than active; only consult them on miss
107+ // 2) Sealed memtables (in-memory, newest-first)
108+ // If active memtable already has a hit, sealed memtables cannot be newer.
97109 if best. is_none ( ) {
98110 if let Some ( entry) =
99111 Self :: get_internal_entry_from_sealed_memtables ( & super_version, key, seqno)
100112 {
113+ // Early-exit if sealed provided the maximal visible seqno.
114+ if seqno > 0 && entry. key . seqno + 1 == seqno {
115+ return Ok ( ignore_tombstone_value ( entry) ) ;
116+ }
101117 best = Some ( entry) ;
102118 }
103119 }
104120
121+ // 3) Tables (on-disk), scanned newest-first within each level/run
105122 if let Some ( entry) =
106123 self . get_internal_entry_from_tables ( & super_version. version , key, seqno) ?
107124 {
108- if let Some ( b) = best {
109- if b. key . seqno >= entry. key . seqno {
110- return Ok ( ignore_tombstone_value ( b) ) ;
111- }
125+ match & best {
126+ Some ( b) if b. key . seqno >= entry. key . seqno => { }
127+ _ => best = Some ( entry) ,
112128 }
113- return Ok ( ignore_tombstone_value ( entry) ) ;
114129 }
115130
116- // Apply tombstone semantics after selecting the newest version
131+ // Apply tombstone semantics after selecting the newest visible version
117132 Ok ( best. and_then ( ignore_tombstone_value) )
118133 }
119134
0 commit comments