@@ -18,6 +18,7 @@ import (
1818 "context"
1919 "errors"
2020 "fmt"
21+ "time"
2122
2223 "github.com/conduitio/conduit-commons/opencdc"
2324 "github.com/conduitio/conduit-connector-postgres/source/logrepl/internal"
@@ -35,15 +36,28 @@ type CDCConfig struct {
3536 Tables []string
3637 TableKeys map [string ]string
3738 WithAvroSchema bool
39+ // BatchSize is the maximum size of a batch that will be read from the DB
40+ // in one go and processed by the CDCHandler.
41+ BatchSize int
3842}
3943
4044// CDCIterator asynchronously listens for events from the logical replication
4145// slot and returns them to the caller through NextN.
4246type CDCIterator struct {
43- config CDCConfig
44- records chan opencdc. Record
47+ config CDCConfig
48+ sub * internal. Subscription
4549
46- sub * internal.Subscription
50+ // batchesCh is a channel shared between this iterator and a CDCHandler,
51+ // to which the CDCHandler is sending batches of records.
52+ // Using a shared queue here would be the fastest option. However,
53+ // we also need to watch for a context that can get cancelled,
54+ // and for the subscription that can end, so using a channel is
55+ // the best option at the moment.
56+ batchesCh chan []opencdc.Record
57+
58+ // recordsForNextRead contains records from the previous batch (returned by the CDCHandler),
59+ // that weren't return by this iterator's ReadN method.
60+ recordsForNextRead []opencdc.Record
4761}
4862
4963// NewCDCIterator initializes logical replication by creating the publication and subscription manager.
@@ -64,8 +78,22 @@ func NewCDCIterator(ctx context.Context, pool *pgxpool.Pool, c CDCConfig) (*CDCI
6478 Msgf ("Publication %q already exists." , c .PublicationName )
6579 }
6680
67- records := make (chan opencdc.Record )
68- handler := NewCDCHandler (internal .NewRelationSet (), c .TableKeys , records , c .WithAvroSchema )
81+ // Using a buffered channel here so that the handler can send a batch
82+ // to the channel and start building a new batch.
83+ // This is useful when the first batch in the channel didn't reach BatchSize (which is sdk.batch.size).
84+ // The handler can prepare the next batch, and the CDCIterator can use them
85+ // to return the maximum number of records.
86+ batchesCh := make (chan []opencdc.Record , 1 )
87+ handler := NewCDCHandler (
88+ ctx ,
89+ internal .NewRelationSet (),
90+ c .TableKeys ,
91+ batchesCh ,
92+ c .WithAvroSchema ,
93+ c .BatchSize ,
94+ // todo make configurable
95+ time .Second ,
96+ )
6997
7098 sub , err := internal .CreateSubscription (
7199 ctx ,
@@ -81,9 +109,9 @@ func NewCDCIterator(ctx context.Context, pool *pgxpool.Pool, c CDCConfig) (*CDCI
81109 }
82110
83111 return & CDCIterator {
84- config : c ,
85- records : records ,
86- sub : sub ,
112+ config : c ,
113+ batchesCh : batchesCh ,
114+ sub : sub ,
87115 }, nil
88116}
89117
@@ -113,8 +141,9 @@ func (i *CDCIterator) StartSubscriber(ctx context.Context) error {
113141 return nil
114142}
115143
116- // NextN takes and returns up to n records from the queue. NextN is allowed to
117- // block until either at least one record is available or the context gets canceled.
144+ // NextN returns up to n records from the internal channel with records.
145+ // NextN is allowed to block until either at least one record is available
146+ // or the context gets canceled.
118147func (i * CDCIterator ) NextN (ctx context.Context , n int ) ([]opencdc.Record , error ) {
119148 if ! i .subscriberReady () {
120149 return nil , errors .New ("logical replication has not been started" )
@@ -124,9 +153,45 @@ func (i *CDCIterator) NextN(ctx context.Context, n int) ([]opencdc.Record, error
124153 return nil , fmt .Errorf ("n must be greater than 0, got %d" , n )
125154 }
126155
127- var recs []opencdc.Record
156+ // First, we check if there are any records from the previous batch
157+ // that we can start with.
158+ recs := make ([]opencdc.Record , len (i .recordsForNextRead ), n )
159+ copy (recs , i .recordsForNextRead )
160+ i .recordsForNextRead = nil
161+
162+ // NextN needs to wait until at least 1 record is available.
163+ if len (recs ) == 0 {
164+ batch , err := i .nextRecordsBatchBlocking (ctx )
165+ if err != nil {
166+ return nil , fmt .Errorf ("failed to fetch next batch of records (blocking): %w" , err )
167+ }
168+ recs = batch
169+ }
170+
171+ // We add any already available batches (i.e., we're not blocking waiting for any new batches to arrive)
172+ // to return at most n records.
173+ for len (recs ) < n {
174+ batch , err := i .nextRecordsBatch (ctx )
175+ if err != nil {
176+ return nil , fmt .Errorf ("failed to fetch next batch of records: %w" , err )
177+ }
178+ if batch == nil {
179+ break
180+ }
181+ recs = i .appendRecordsWithLimit (recs , batch , n )
182+ }
183+
184+ sdk .Logger (ctx ).Trace ().
185+ Int ("records" , len (recs )).
186+ Int ("records_for_next_read" , len (i .recordsForNextRead )).
187+ Msg ("CDCIterator.NextN returning records" )
188+ return recs , nil
189+ }
128190
129- // Block until at least one record is received or context is canceled
191+ // nextRecordsBatchBlocking waits for the next batch of records to arrive,
192+ // or for the context to be done, or for the subscription to be done,
193+ // whichever comes first.
194+ func (i * CDCIterator ) nextRecordsBatchBlocking (ctx context.Context ) ([]opencdc.Record , error ) {
130195 select {
131196 case <- ctx .Done ():
132197 return nil , ctx .Err ()
@@ -142,33 +207,59 @@ func (i *CDCIterator) NextN(ctx context.Context, n int) ([]opencdc.Record, error
142207 // subscription stopped without an error and the context is still
143208 // open, this is a strange case, shouldn't actually happen
144209 return nil , fmt .Errorf ("subscription stopped, no more data to fetch (this smells like a bug)" )
145- case rec := <- i .records :
146- recs = append (recs , rec )
210+ case batch := <- i .batchesCh :
211+ sdk .Logger (ctx ).Trace ().
212+ Int ("records" , len (batch )).
213+ Msg ("CDCIterator.NextN received batch of records (blocking)" )
214+ return batch , nil
147215 }
216+ }
148217
149- for len (recs ) < n {
150- select {
151- case rec := <- i .records :
152- recs = append (recs , rec )
153- case <- ctx .Done ():
154- return nil , ctx .Err ()
155- case <- i .sub .Done ():
156- if err := i .sub .Err (); err != nil {
157- return recs , fmt .Errorf ("logical replication error: %w" , err )
158- }
159- if err := ctx .Err (); err != nil {
160- // Return what we have with context error
161- return recs , err
162- }
163- // Return what we have with subscription stopped error
164- return recs , fmt .Errorf ("subscription stopped, no more data to fetch (this smells like a bug)" )
165- default :
166- // No more records currently available
167- return recs , nil
218+ func (i * CDCIterator ) nextRecordsBatch (ctx context.Context ) ([]opencdc.Record , error ) {
219+ select {
220+ case <- ctx .Done ():
221+ // Return what we have with the error
222+ return nil , ctx .Err ()
223+ case <- i .sub .Done ():
224+ if err := i .sub .Err (); err != nil {
225+ return nil , fmt .Errorf ("logical replication error: %w" , err )
168226 }
227+ if err := ctx .Err (); err != nil {
228+ // Return what we have with the context error
229+ return nil , err
230+ }
231+ // Return what we have with subscription stopped error
232+ return nil , fmt .Errorf ("subscription stopped, no more data to fetch (this smells like a bug)" )
233+ case batch := <- i .batchesCh :
234+ sdk .Logger (ctx ).Trace ().
235+ Int ("records" , len (batch )).
236+ Msg ("CDCIterator.NextN received batch of records" )
237+
238+ return batch , nil
239+ default :
240+ // No more records currently available
241+ return nil , nil
169242 }
243+ }
170244
171- return recs , nil
245+ // appendRecordsWithLimit appends records to dst from src, until the given limit is reached,
246+ // or all records from src have been moved.
247+ // If some records from src are not moved (probably because they lack emotions),
248+ // they are saved to recordsForNextRead.
249+ func (i * CDCIterator ) appendRecordsWithLimit (dst []opencdc.Record , src []opencdc.Record , limit int ) []opencdc.Record {
250+ if len (src ) == 0 || len (dst ) > limit {
251+ return src
252+ }
253+
254+ needed := limit - len (dst )
255+ if needed > len (src ) {
256+ needed = len (src )
257+ }
258+
259+ dst = append (dst , src [:needed ]... )
260+ i .recordsForNextRead = src [needed :]
261+
262+ return dst
172263}
173264
174265// Ack forwards the acknowledgment to the subscription.
0 commit comments