Skip to content

Commit 19fd66b

Browse files
author
ffffwh
committed
add logs for debugging bigtx stuck
1 parent 31c27f4 commit 19fd66b

File tree

5 files changed

+72
-31
lines changed

5 files changed

+72
-31
lines changed

driver/common/binlog.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type EntryContext struct {
4848
// Only a DML has a tableItem. For a DDL, its tableItem is nil.
4949
TableItems []*ApplierTableItem
5050
OriginalSize int // size of binlog entry
51+
Rows int // for logging
5152
}
5253

5354
func NewBinlogEntry() *DataEntry {

driver/mysql/applier_incr.go

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ func (a *ApplierIncr) bigTxQueueExecutor() {
200200
for {
201201
item := <-a.bigTxEventQueue
202202
if item == nil {
203+
// chan closed in Shutdown()
203204
break
204205
}
205206

@@ -255,10 +256,11 @@ func (a *ApplierIncr) MtsWorker(workerIndex int) {
255256

256257
func (a *ApplierIncr) handleEntry(entryCtx *common.EntryContext) (err error) {
257258
binlogEntry := entryCtx.Entry
259+
isBig := binlogEntry.IsPartOfBigTx()
258260
txGno := binlogEntry.Coordinates.GetGNO()
259261

260262
if a.inBigTx && binlogEntry.Index == 0 {
261-
a.logger.Info("found resent BinlogEntry inBigTx", "gno", txGno)
263+
a.logger.Info("bigtx: found resent BinlogEntry", "gno", txGno)
262264
// src is resending an earlier BinlogEntry
263265
_, err = a.dbs[0].Db.ExecContext(a.ctx, "rollback")
264266
if err != nil {
@@ -328,6 +330,9 @@ func (a *ApplierIncr) handleEntry(entryCtx *common.EntryContext) (err error) {
328330
gtidSetItem.NRow += 1
329331
if binlogEntry.Coordinates.GetSequenceNumber() == 0 {
330332
// MySQL 5.6: non mts
333+
if isBig {
334+
a.inBigTx = true
335+
}
331336
err := a.setTableItemForBinlogEntry(entryCtx)
332337
if err != nil {
333338
return err
@@ -365,21 +370,21 @@ func (a *ApplierIncr) handleEntry(entryCtx *common.EntryContext) (err error) {
365370
}
366371

367372
hasDDL := binlogEntry.HasDDL()
368-
// DDL must be executed separatedly
369-
if hasDDL || a.prevDDL {
370-
a.logger.Debug("MTS found DDL. WaitForAllCommitted",
371-
"gno", txGno, "hasDDL", hasDDL, "prevDDL", a.prevDDL)
373+
inMiddleDDL := hasDDL || a.prevDDL // DDL must be executed separatedly
374+
if inMiddleDDL || isBig {
375+
a.logger.Info("WaitForAllCommitted",
376+
"gno", txGno, "seq", binlogEntry.Coordinates.GetSequenceNumber(),
377+
"lc", binlogEntry.Coordinates.GetLastCommit(), "leq", a.mtsManager.lastEnqueue,
378+
"hasDDL", hasDDL, "prevDDL", a.prevDDL,
379+
"bigtx", isBig, "index", binlogEntry.Index)
372380
if !a.mtsManager.WaitForAllCommitted() {
373381
return nil // shutdown
374382
}
375383
}
376384
a.prevDDL = hasDDL
377385

378-
if binlogEntry.IsPartOfBigTx() {
386+
if isBig {
379387
a.inBigTx = true
380-
if !a.mtsManager.WaitForAllCommitted() {
381-
return nil // shutdown
382-
}
383388
a.wsManager.resetCommonParent(binlogEntry.Coordinates.GetSequenceNumber())
384389
}
385390
}
@@ -389,17 +394,18 @@ func (a *ApplierIncr) handleEntry(entryCtx *common.EntryContext) (err error) {
389394
return err
390395
}
391396

392-
if !binlogEntry.IsPartOfBigTx() && !a.mysqlContext.UseMySQLDependency {
397+
if !isBig && !a.mysqlContext.UseMySQLDependency {
393398
newLC := a.wsManager.GatLastCommit(entryCtx, a.logger)
394399
binlogEntry.Coordinates.(*common.MySQLCoordinateTx).LastCommitted = newLC
395400
a.logger.Debug("WritesetManager", "lc", newLC, "seq", binlogEntry.Coordinates.GetSequenceNumber(),
396401
"gno", txGno)
397402
}
398403

399-
if binlogEntry.IsPartOfBigTx() {
404+
if isBig {
400405
if binlogEntry.Index == 0 {
401406
a.mtsManager.lastEnqueue = binlogEntry.Coordinates.GetSequenceNumber()
402407
}
408+
a.logger.Info("bigtx ApplyBinlogEvent", "gno", txGno, "index", binlogEntry.Index)
403409
err = a.ApplyBinlogEvent(0, entryCtx)
404410
if err != nil {
405411
return err
@@ -521,6 +527,7 @@ func (a *ApplierIncr) prepareIfNilAndExecute(item *dmlExecItem, workerIdx int) (
521527
// ApplyEventQueries applies multiple DML queries onto the dest table
522528
func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.EntryContext) (err error) {
523529
logger := a.logger.Named("ApplyBinlogEvent")
530+
binlogEntryCtx.Rows = 0 // count for logging
524531
binlogEntry := binlogEntryCtx.Entry
525532
defer atomic.AddInt64(a.memory2, -int64(binlogEntry.Size()))
526533

@@ -535,7 +542,7 @@ func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.Ent
535542
// Note: gtid_next cannot be set when there is an ongoing transaction.
536543
if a.mysqlContext.SetGtidNext {
537544
_, err = dbApplier.Db.ExecContext(a.ctx, fmt.Sprintf("set gtid_next = '%v:%v' /*dtle*/",
538-
binlogEntry.Coordinates.GetSidStr(), binlogEntry.Coordinates.GetGNO()))
545+
binlogEntry.Coordinates.GetSidStr(), gno))
539546
if err != nil {
540547
return errors.Wrap(err, "set gtid_next")
541548
}
@@ -563,7 +570,7 @@ func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.Ent
563570
_, err = dbApplier.Db.ExecContext(a.ctx, query)
564571
if err != nil {
565572
errCtx := errors.Wrapf(err, "tx.Exec. gno %v queryBegin %v workerIdx %v",
566-
binlogEntry.Coordinates.GetGNO(), g.StrLim(query, 10), workerIdx)
573+
gno, g.StrLim(query, 10), workerIdx)
567574
if sql.IgnoreError(err) {
568575
logger.Warn("Ignore error", "err", errCtx)
569576
return nil
@@ -577,7 +584,7 @@ func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.Ent
577584

578585
queueOrExec := func(item *dmlExecItem) error {
579586
// TODO check if shutdown?
580-
if !a.noBigTxDMLPipe && a.inBigTx {
587+
if a.inBigTx && !a.noBigTxDMLPipe {
581588
a.bigTxEventWg.Add(1)
582589
select {
583590
case <-a.shutdownCh:
@@ -594,7 +601,7 @@ func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.Ent
594601
if a.HasShutdown() {
595602
break
596603
}
597-
logger.Debug("binlogEntry.Events", "gno", binlogEntry.Coordinates.GetGNO(), "event", i)
604+
logger.Debug("binlogEntry.Events", "gno", gno, "event", i)
598605

599606
if event.DML == common.NotDML {
600607
var err error
@@ -683,6 +690,7 @@ func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.Ent
683690
switch event.DML {
684691
case common.InsertDML:
685692
nRows := len(event.Rows)
693+
binlogEntryCtx.Rows += nRows
686694
for i := 0; i < nRows; {
687695
var pstmt **gosql.Stmt
688696
var rows [][]interface{}
@@ -717,6 +725,7 @@ func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.Ent
717725
}
718726
}
719727
case common.DeleteDML:
728+
binlogEntryCtx.Rows += len(event.Rows)
720729
for _, row := range event.Rows {
721730
pstmt := &tableItem.PsDelete[workerIdx]
722731
query, uniqueKeyArgs, hasUK, err := sql.BuildDMLDeleteQuery(event.DatabaseName, event.TableName,
@@ -734,15 +743,16 @@ func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.Ent
734743
case common.UpdateDML:
735744
if len(event.Rows) % 2 != 0 {
736745
return fmt.Errorf("bad update event. row number is not 2N %v gno %v",
737-
len(event.Rows), binlogEntry.Coordinates.GetGNO())
746+
len(event.Rows), gno)
738747
}
748+
binlogEntryCtx.Rows += len(event.Rows) / 2
739749
for i := 0; i < len(event.Rows); i += 2 {
740750
rowBefore := event.Rows[i]
741751
rowAfter := event.Rows[i+1]
742752

743753
if len(rowBefore) == 0 && len(rowAfter) == 0 {
744754
return fmt.Errorf("bad update event. row number is not 2N %v gno %v",
745-
len(event.Rows), binlogEntry.Coordinates.GetGNO())
755+
len(event.Rows), gno)
746756
}
747757

748758
if len(rowBefore) == 0 { // insert
@@ -800,27 +810,42 @@ func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.Ent
800810
timestamp = event.Timestamp
801811
atomic.AddUint64(&a.appliedQueryCount, uint64(1))
802812
}
813+
if a.inBigTx && !a.noBigTxDMLPipe {
814+
a.logger.Info("a.bigTxEventWg.Wait before", "gno", gno, "index", binlogEntry.Index)
815+
}
803816
a.bigTxEventWg.Wait()
804817
if a.HasShutdown() {
805818
return fmt.Errorf("ApplyBinlogEvent: applier has been shutdown. gno %v", gno)
806819
}
807820

808821
if binlogEntry.Final {
822+
isBigTx := binlogEntry.IsPartOfBigTx()
809823
if !a.SkipGtidExecutedTable && a.sourceType == "mysql" {
810824
if binlogEntry.IsOneStmtDDL() && a.mysqlContext.SetGtidNext {
811825
err1 := dbApplier.SetGtidNextAutomatic(a.ctx)
812826
if err1 != nil {
813827
err = errors.Wrapf(err1, "restore gtid_next")
814828
}
815829
}
816-
logger.Debug("insert gno", "gno", binlogEntry.Coordinates.GetGNO())
830+
831+
if a.logTxCommit || isBigTx {
832+
logger.Info("insert gno", "gno", gno, "bigtx", isBigTx, "index", binlogEntry.Index,
833+
"rows", binlogEntryCtx.Rows)
834+
} else {
835+
logger.Debug("insert gno", "gno", gno, "rows", binlogEntryCtx.Rows)
836+
}
837+
817838
_, err = dbApplier.PsInsertExecutedGtid.ExecContext(a.ctx,
818-
a.subject, binlogEntry.Coordinates.GetSid().(uuid.UUID).Bytes(), binlogEntry.Coordinates.GetGNO())
839+
a.subject, binlogEntry.Coordinates.GetSid().(uuid.UUID).Bytes(), gno)
819840
if err != nil {
820841
return errors.Wrap(err, "insert gno")
821842
}
822843
}
823844

845+
if a.logTxCommit || isBigTx {
846+
logger.Info("committing tx", "gno", gno, "bigtx", isBigTx, "index", binlogEntry.Index,
847+
"rows", binlogEntryCtx.Rows)
848+
}
824849
if _, err := dbApplier.Db.ExecContext(a.ctx, "commit"); err != nil {
825850
return errors.Wrap(err, "dbApplier.Tx.Commit")
826851
} else {
@@ -830,12 +855,15 @@ func (a *ApplierIncr) ApplyBinlogEvent(workerIdx int, binlogEntryCtx *common.Ent
830855
if a.printTps {
831856
atomic.AddUint32(&a.txLastNSeconds, 1)
832857
}
833-
if a.logTxCommit {
834-
logger.Info("applier tx committed", "gno", binlogEntry.Coordinates.GetGNO())
858+
if a.logTxCommit || isBigTx {
859+
logger.Info("applier tx committed", "gno", gno, "bigtx", isBigTx, "index", binlogEntry.Index,
860+
"rows", binlogEntryCtx.Rows)
835861
} else {
836-
logger.Debug("applier tx committed", "gno", binlogEntry.Coordinates.GetGNO())
862+
logger.Debug("applier tx committed", "gno", gno, "rows", binlogEntryCtx.Rows)
837863
}
838864
atomic.AddUint32(&a.appliedTxCount, 1)
865+
} else {
866+
logger.Info("uncommitted bigtx part", "gno", gno, "index", binlogEntry.Index, "rows", binlogEntryCtx.Rows)
839867
}
840868
a.EntryExecutedHook(binlogEntry)
841869

driver/mysql/binlog/binlog_reader.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ func (b *BinlogReader) handleEvent(ev *replication.BinlogEvent, entriesChannel c
473473
Entry: entry,
474474
TableItems: nil,
475475
OriginalSize: 1, // GroupMaxSize is default to 1 and we send on EntriesSize >= GroupMaxSize
476+
Rows: 0,
476477
}
477478
case replication.QUERY_EVENT:
478479
return b.handleQueryEvent(ev, entriesChannel)
@@ -807,15 +808,19 @@ func (b *BinlogReader) setDtleQuery(query string) string {
807808

808809
func (b *BinlogReader) sendEntry(entriesChannel chan<- *common.EntryContext) {
809810
isBig := b.entryContext.Entry.IsPartOfBigTx()
811+
coordinate := b.entryContext.Entry.Coordinates.(*common.MySQLCoordinateTx)
810812
if isBig {
811813
newVal := atomic.AddInt32(&b.BigTxCount, 1)
812814
if newVal == 1 {
813815
g.AddBigTxJob()
814816
}
817+
b.logger.Info("send bigtx entry", "gno", coordinate.GNO,
818+
"index", b.entryContext.Entry.Index, "final", b.entryContext.Entry.Final, "count", newVal,
819+
"rows", b.entryContext.Rows)
815820
}
816-
coordinate := b.entryContext.Entry.Coordinates.(*common.MySQLCoordinateTx)
817821
b.logger.Debug("sendEntry", "gno", coordinate.GNO, "events", len(b.entryContext.Entry.Events),
818-
"isBig", isBig, "index", b.entryContext.Entry.Index, "final", b.entryContext.Entry.Final)
822+
"isBig", isBig, "index", b.entryContext.Entry.Index, "final", b.entryContext.Entry.Final,
823+
"rows", b.entryContext.Rows)
819824
atomic.AddInt64(b.memory, int64(b.entryContext.Entry.Size()))
820825
select {
821826
case <-b.shutdownCh:
@@ -1913,6 +1918,7 @@ func (b *BinlogReader) handleRowsEvent(ev *replication.BinlogEvent, rowsEvent *r
19131918
switch dml {
19141919
case common.InsertDML, common.DeleteDML:
19151920
if whereTrue0 {
1921+
b.entryContext.Rows += 1
19161922
b.entryContext.OriginalSize += avgRowSize
19171923
dmlEvent.Rows = append(dmlEvent.Rows, row0)
19181924
} else {
@@ -1929,6 +1935,7 @@ func (b *BinlogReader) handleRowsEvent(ev *replication.BinlogEvent, rowsEvent *r
19291935
if !whereTrue0 && !whereTrue1 {
19301936
// append no rows
19311937
} else {
1938+
b.entryContext.Rows += 1
19321939
if whereTrue0 {
19331940
b.entryContext.OriginalSize += avgRowSize
19341941
dmlEvent.Rows = append(dmlEvent.Rows, row0)
@@ -2004,6 +2011,7 @@ func (b *BinlogReader) handleRowsEvent(ev *replication.BinlogEvent, rowsEvent *r
20042011
Entry: entry,
20052012
TableItems: nil,
20062013
OriginalSize: 1,
2014+
Rows: 0,
20072015
}
20082016
}
20092017
}

driver/mysql/extractor.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,11 @@ func (e *Extractor) initNatsPubClient(natsAddr string) (err error) {
654654

655655
ack := &common.BigTxAck{}
656656
_, err = ack.Unmarshal(m.Data)
657-
e.logger.Debug("bigtx_ack", "gno", ack.GNO, "index", ack.Index)
657+
if err != nil {
658+
e.onError(common.TaskStateDead, errors.Wrap(err, "bigtx_ack. Unmarshal"))
659+
}
660+
e.logger.Info("bigtx_ack", "gno", ack.GNO, "index", ack.Index,
661+
"count", atomic.LoadInt32(&e.binlogReader.BigTxCount))
658662

659663
if !e.shutdown {
660664
newVal := atomic.AddInt32(&e.binlogReader.BigTxCount, -1)

driver/mysql/sql/builder.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ func BuildDMLDeleteQuery(databaseName, tableName string, tableColumns *common.Co
8787
args []interface{}, stmt *gosql.Stmt) (result string, columnArgs []interface{}, hasUK bool, err error) {
8888

8989
if !(len(args) >= tableColumns.Len() || len(args) == len(columnMapTo)) {
90-
return result, columnArgs, hasUK, fmt.Errorf("args count differs from table column count in BuildDMLDeleteQuery %v %v %v",
91-
len(args), tableColumns.Len(), len(columnMapTo))
90+
return result, columnArgs, hasUK, fmt.Errorf("BuildDMLDeleteQuery bad args count %v %v %v %v.%v",
91+
len(args), tableColumns.Len(), len(columnMapTo), databaseName, tableName)
9292
}
9393

9494
comparisons := []string{}
@@ -172,13 +172,13 @@ func BuildDMLInsertQuery(databaseName, tableName string, tableColumns *common.Co
172172

173173
if iRow == 0 {
174174
if !(len(args) >= tableColumns.Len() || len(args) == len(columnMapTo)) {
175-
return "", nil, fmt.Errorf("BuildDMLInsertQuery: args count differs from table column count %v %v %v",
176-
len(args), tableColumns.Len(), len(columnMapTo))
175+
return "", nil, fmt.Errorf("BuildDMLInsertQuery: bad args count %v %v %v %v.%v",
176+
len(args), tableColumns.Len(), len(columnMapTo), databaseName, tableName)
177177
}
178178
} else {
179179
if len(args) != len(rows[0]) {
180-
return "", nil, fmt.Errorf("BuildDMLInsertQuery: args count differs from args0 %v %v",
181-
len(args), len(rows[0]))
180+
return "", nil, fmt.Errorf("BuildDMLInsertQuery: args count differs from args0 %v %v %v.%v",
181+
len(args), len(rows[0]), databaseName, tableName)
182182
}
183183
}
184184

0 commit comments

Comments
 (0)