@@ -61,6 +61,7 @@ export interface MongoCompactOptions extends storage.CompactOptions {}
6161const DEFAULT_CLEAR_BATCH_LIMIT = 5000 ;
6262const DEFAULT_MOVE_BATCH_LIMIT = 2000 ;
6363const DEFAULT_MOVE_BATCH_QUERY_LIMIT = 10_000 ;
64+ const DEFAULT_MIN_BUCKET_CHANGES = 10 ;
6465
6566/** This default is primarily for tests. */
6667const DEFAULT_MEMORY_LIMIT_MB = 64 ;
@@ -73,6 +74,7 @@ export class MongoCompactor {
7374 private moveBatchLimit : number ;
7475 private moveBatchQueryLimit : number ;
7576 private clearBatchLimit : number ;
77+ private minBucketChanges : number ;
7678 private maxOpId : bigint ;
7779 private buckets : string [ ] | undefined ;
7880 private signal ?: AbortSignal ;
@@ -88,6 +90,7 @@ export class MongoCompactor {
8890 this . moveBatchLimit = options ?. moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT ;
8991 this . moveBatchQueryLimit = options ?. moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT ;
9092 this . clearBatchLimit = options ?. clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT ;
93+ this . minBucketChanges = options ?. minBucketChanges ?? DEFAULT_MIN_BUCKET_CHANGES ;
9194 this . maxOpId = options ?. maxOpId ?? 0n ;
9295 this . buckets = options ?. compactBuckets ;
9396 this . signal = options ?. signal ;
@@ -113,14 +116,26 @@ export class MongoCompactor {
113116
114117 private async compactDirtyBuckets ( ) {
115118 while ( ! this . signal ?. aborted ) {
116- // Process all buckets with 1 or more changes since last time
117- const buckets = await this . dirtyBucketBatch ( { minBucketChanges : 1 } ) ;
119+ // Process all buckets with 10 or more changes since last time.
120+ // We exclude the last 100 compacted buckets, to avoid repeatedly re-compacting the same buckets over and over
121+ // if they are modified while compacting.
122+ const TRACK_RECENTLY_COMPACTED_NUMBER = 100 ;
123+
124+ let recentlyCompacted : string [ ] = [ ] ;
125+ const buckets = await this . dirtyBucketBatch ( {
126+ minBucketChanges : this . minBucketChanges ,
127+ exclude : recentlyCompacted
128+ } ) ;
118129 if ( buckets . length == 0 ) {
119130 // All done
120131 break ;
121132 }
122- for ( let bucket of buckets ) {
133+ for ( let { bucket } of buckets ) {
123134 await this . compactSingleBucket ( bucket ) ;
135+ recentlyCompacted . push ( bucket ) ;
136+ }
137+ if ( recentlyCompacted . length > TRACK_RECENTLY_COMPACTED_NUMBER ) {
138+ recentlyCompacted = recentlyCompacted . slice ( - TRACK_RECENTLY_COMPACTED_NUMBER ) ;
124139 }
125140 }
126141 }
@@ -482,10 +497,20 @@ export class MongoCompactor {
482497 break ;
483498 }
484499 const start = Date . now ( ) ;
485- logger . info ( `Calculating checksums for batch of ${ buckets . length } buckets, starting at ${ buckets [ 0 ] } ` ) ;
500+ logger . info ( `Calculating checksums for batch of ${ buckets . length } buckets` ) ;
486501
487- await this . updateChecksumsBatch ( buckets ) ;
488- logger . info ( `Updated checksums for batch of ${ buckets . length } buckets in ${ Date . now ( ) - start } ms` ) ;
502+ // Filter batch by estimated bucket size, to reduce possibility of timeouts
503+ let checkBuckets : typeof buckets = [ ] ;
504+ let totalCountEstimate = 0 ;
505+ for ( let bucket of buckets ) {
506+ checkBuckets . push ( bucket ) ;
507+ totalCountEstimate += bucket . estimatedCount ;
508+ if ( totalCountEstimate > 50_000 ) {
509+ break ;
510+ }
511+ }
512+ await this . updateChecksumsBatch ( checkBuckets . map ( ( b ) => b . bucket ) ) ;
513+ logger . info ( `Updated checksums for batch of ${ checkBuckets . length } buckets in ${ Date . now ( ) - start } ms` ) ;
489514 count += buckets . length ;
490515 }
491516 return { buckets : count } ;
@@ -497,7 +522,10 @@ export class MongoCompactor {
497522 * This cannot be used to iterate on its own - the client is expected to process these buckets and
498523 * set estimate_since_compact.count: 0 when done, before fetching the next batch.
499524 */
500- private async dirtyBucketBatch ( options : { minBucketChanges : number } ) : Promise < string [ ] > {
525+ private async dirtyBucketBatch ( options : {
526+ minBucketChanges : number ;
527+ exclude ?: string [ ] ;
528+ } ) : Promise < { bucket : string ; estimatedCount : number } [ ] > {
501529 if ( options . minBucketChanges <= 0 ) {
502530 throw new ReplicationAssertionError ( 'minBucketChanges must be >= 1' ) ;
503531 }
@@ -506,22 +534,28 @@ export class MongoCompactor {
506534 . find (
507535 {
508536 '_id.g' : this . group_id ,
509- 'estimate_since_compact.count' : { $gte : options . minBucketChanges }
537+ 'estimate_since_compact.count' : { $gte : options . minBucketChanges } ,
538+ '_id.b' : { $nin : options . exclude ?? [ ] }
510539 } ,
511540 {
512541 projection : {
513- _id : 1
542+ _id : 1 ,
543+ estimate_since_compact : 1 ,
544+ compacted_state : 1
514545 } ,
515546 sort : {
516547 'estimate_since_compact.count' : - 1
517548 } ,
518- limit : 5_000 ,
549+ limit : 200 ,
519550 maxTimeMS : MONGO_OPERATION_TIMEOUT_MS
520551 }
521552 )
522553 . toArray ( ) ;
523554
524- return dirtyBuckets . map ( ( bucket ) => bucket . _id . b ) ;
555+ return dirtyBuckets . map ( ( bucket ) => ( {
556+ bucket : bucket . _id . b ,
557+ estimatedCount : bucket . estimate_since_compact ! . count + ( bucket . compacted_state ?. count ?? 0 )
558+ } ) ) ;
525559 }
526560
527561 private async updateChecksumsBatch ( buckets : string [ ] ) {
0 commit comments