Skip to content

Commit 7dcd08d

Browse files
committed
Implement automatic recovery for binlog corruption in DbReplicatorRealtime
- Added error handling for OperationalError (Error 1236) to detect binlog index file corruption. - Implemented automatic deletion of the corrupted binlog directory and clean exit for process restart. - Enhanced logging for better diagnostics during recovery attempts.
1 parent 5821f7b commit 7dcd08d

File tree

1 file changed

+42
-1
lines changed

1 file changed

+42
-1
lines changed

mysql_ch_replicator/db_replicator_realtime.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import json
2+
import os
3+
import shutil
24
import time
35
from collections import defaultdict
46
from logging import getLogger
57

8+
import pymysql.err
9+
610
from .binlog_replicator import EventType, LogEvent
711
from .common import Status
812
from .converter import strip_sql_comments
@@ -70,7 +74,44 @@ def run_realtime_replication(self):
7074
)
7175
break
7276

73-
event = self.replicator.data_reader.read_next_event()
77+
try:
78+
event = self.replicator.data_reader.read_next_event()
79+
except pymysql.err.OperationalError as e:
80+
# Check if this is the binlog index file corruption error (Error 1236)
81+
if e.args[0] == 1236:
82+
logger.error(
83+
"[binlogrepl] operational error (1236, 'Could not find first log file name in binary log index file')"
84+
)
85+
logger.error(f"[binlogrepl] Full error: {e}")
86+
logger.info("[binlogrepl] Attempting automatic recovery...")
87+
88+
# Get binlog directory path for this database
89+
binlog_dir = os.path.join(
90+
self.replicator.config.binlog_replicator.data_dir,
91+
self.replicator.database
92+
)
93+
94+
# Delete the corrupted binlog directory
95+
if os.path.exists(binlog_dir):
96+
logger.warning(f"[binlogrepl] Deleting corrupted binlog directory: {binlog_dir}")
97+
try:
98+
shutil.rmtree(binlog_dir)
99+
logger.info(f"[binlogrepl] Successfully deleted binlog directory: {binlog_dir}")
100+
except Exception as delete_error:
101+
logger.error(f"[binlogrepl] Failed to delete binlog directory: {delete_error}", exc_info=True)
102+
raise RuntimeError("Failed to delete corrupted binlog directory") from delete_error
103+
else:
104+
logger.warning(f"[binlogrepl] Binlog directory does not exist: {binlog_dir}")
105+
106+
# Exit process cleanly to trigger automatic restart by runner
107+
logger.info("[binlogrepl] Exiting process for automatic restart by runner")
108+
logger.info("[binlogrepl] The runner will automatically restart this process")
109+
raise RuntimeError("Binlog corruption detected (Error 1236) - restarting for recovery") from e
110+
else:
111+
# Re-raise other OperationalErrors
112+
logger.error(f"[binlogrepl] Unhandled OperationalError: {e}", exc_info=True)
113+
raise
114+
74115
if event is None:
75116
time.sleep(self.READ_LOG_INTERVAL)
76117
self.upload_records_if_required(table_name=None)

0 commit comments

Comments
 (0)