@@ -882,45 +882,74 @@ func RunCommitTrigger(
882882 "commit wait. Was its timestamp bumped after acquiring latches?" , txn , ct .Kind ())
883883 }
884884
885+ // Used by both splits and merges.
886+ maybeWrapReplicaCorruptionError := func (ctx context.Context , err error ) error {
887+ if err == nil {
888+ log .KvExec .Fatalf (ctx , "unexpected nil error" )
889+ }
890+ if info := pebble .ExtractDataCorruptionInfo (err ); info != nil {
891+ // Data corruption errors due to external SSTable references getting
892+ // deleted should not be wrapped in replica corruption errors. This
893+ // ensures that we simply fail the split or merge and propagate the error,
894+ // but don't crash the process. In such cases, an excise command should be
895+ // used to get out of this data corruption situation.
896+ return err
897+ }
898+ // Otherwise, fail the split or merge with a critical error that crashes the
899+ // process. Reporting a replica corruption error ensures this. See
900+ // setCorruptRaftMuLocked.
901+ return kvpb .MaybeWrapReplicaCorruptionError (ctx , err )
902+ }
903+
885904 // Stage the commit trigger's side-effects so that they will go into effect on
886905 // each Replica when the corresponding Raft log entry is applied. Only one
887906 // commit trigger can be set.
888907 if ct .GetSplitTrigger () != nil {
908+ sl := MakeStateLoader (rec )
909+ lhsLease , err := sl .LoadLease (ctx , batch )
910+ if err != nil {
911+ return result.Result {}, maybeWrapReplicaCorruptionError (
912+ ctx , errors .Wrap (err , "unable to load lease" ),
913+ )
914+ }
915+ gcThreshold , err := sl .LoadGCThreshold (ctx , batch )
916+ if err != nil {
917+ return result.Result {}, maybeWrapReplicaCorruptionError (
918+ ctx , errors .Wrap (err , "unable to load GCThreshold" ),
919+ )
920+ }
921+ gcHint , err := sl .LoadGCHint (ctx , batch )
922+ if err != nil {
923+ return result.Result {}, maybeWrapReplicaCorruptionError (
924+ ctx , errors .Wrap (err , "unable to load GCHint" ),
925+ )
926+ }
927+ replicaVersion , err := sl .LoadVersion (ctx , batch )
928+ if err != nil {
929+ return result.Result {}, maybeWrapReplicaCorruptionError (
930+ ctx , errors .Wrap (err , "unable to load replica version" ),
931+ )
932+ }
933+ in := splitTriggerHelperInput {
934+ leftLease : lhsLease ,
935+ gcThreshold : gcThreshold ,
936+ gcHint : gcHint ,
937+ replicaVersion : replicaVersion ,
938+ }
939+
889940 newMS , res , err := splitTrigger (
890- ctx , rec , batch , * ms , ct .SplitTrigger , txn .WriteTimestamp ,
941+ ctx , rec , batch , * ms , ct .SplitTrigger , in , txn .WriteTimestamp ,
891942 )
892943 if err != nil {
893- if info := pebble .ExtractDataCorruptionInfo (err ); info != nil {
894- // We want to handle the data corruption error here because it's possible
895- // that a file that an external SSTable references got deleted. We want to
896- // fail the split and propagate the error, but we don't want to crash the
897- // process. An excise command could be used to get out of this data
898- // corruption.
899- return result.Result {}, err
900- } else {
901- // Otherwise, failing the split is a critical error. We should crash
902- // the process and report a replica corruption.
903- return result.Result {}, kvpb .MaybeWrapReplicaCorruptionError (ctx , err )
904- }
944+ return result.Result {}, maybeWrapReplicaCorruptionError (ctx , err )
905945 }
906946 * ms = newMS
907947 return res , nil
908948 }
909949 if mt := ct .GetMergeTrigger (); mt != nil {
910950 res , err := mergeTrigger (ctx , rec , batch , ms , mt , txn .WriteTimestamp )
911951 if err != nil {
912- if info := pebble .ExtractDataCorruptionInfo (err ); info != nil {
913- // We want to handle the data corruption error here because it's
914- // possible that a file that an external SSTable references got deleted.
915- // We want to fail the merge and propagate the error, but we don't want
916- // to crash the process. An excise command could be used to get out of
917- // this data corruption.
918- return result.Result {}, err
919- } else {
920- // Otherwise, failing the merge is a critical error. We should crash
921- // the process and report a replica corruption.
922- return result.Result {}, kvpb .MaybeWrapReplicaCorruptionError (ctx , err )
923- }
952+ return result.Result {}, maybeWrapReplicaCorruptionError (ctx , err )
924953 }
925954 return res , nil
926955 }
@@ -1135,6 +1164,7 @@ func splitTrigger(
11351164 batch storage.Batch ,
11361165 bothDeltaMS enginepb.MVCCStats ,
11371166 split * roachpb.SplitTrigger ,
1167+ in splitTriggerHelperInput ,
11381168 ts hlc.Timestamp ,
11391169) (enginepb.MVCCStats , result.Result , error ) {
11401170 desc := rec .Desc ()
@@ -1210,7 +1240,7 @@ func splitTrigger(
12101240 MaxBytesDiff : MaxMVCCStatBytesDiff .Get (& rec .ClusterSettings ().SV ),
12111241 UseEstimatesBecauseExternalBytesArePresent : split .UseEstimatesBecauseExternalBytesArePresent ,
12121242 }
1213- return splitTriggerHelper (ctx , rec , batch , h , split , ts )
1243+ return splitTriggerHelper (ctx , rec , batch , in , h , split , ts )
12141244}
12151245
12161246// splitScansRightForStatsFirst controls whether the left hand side or the right
@@ -1246,13 +1276,23 @@ func makeScanStatsFn(
12461276 }
12471277}
12481278
1279+ // splitTriggerHelperInput contains metadata needed by the RHS when running the
1280+ // splitTriggerHelper.
1281+ type splitTriggerHelperInput struct {
1282+ leftLease roachpb.Lease
1283+ gcThreshold * hlc.Timestamp
1284+ gcHint * roachpb.GCHint
1285+ replicaVersion roachpb.Version
1286+ }
1287+
12491288// splitTriggerHelper continues the work begun by splitTrigger, but has a
12501289// reduced scope that has all stats-related concerns bundled into a
12511290// splitStatsHelper.
12521291func splitTriggerHelper (
12531292 ctx context.Context ,
12541293 rec EvalContext ,
12551294 batch storage.Batch ,
1295+ in splitTriggerHelperInput ,
12561296 statsInput splitStatsHelperInput ,
12571297 split * roachpb.SplitTrigger ,
12581298 ts hlc.Timestamp ,
@@ -1394,27 +1434,22 @@ func splitTriggerHelper(
13941434 // - node two becomes the lease holder for [c,e). Its timestamp cache does
13951435 // not know about the read at 'd' which happened at the beginning.
13961436 // - node two can illegally propose a write to 'd' at a lower timestamp.
1397- sl := MakeStateLoader (rec )
1398- leftLease , err := sl .LoadLease (ctx , batch )
1399- if err != nil {
1400- return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to load lease" )
1401- }
1402- if leftLease .Empty () {
1437+ if in .leftLease .Empty () {
14031438 log .KvExec .Fatalf (ctx , "LHS of split has no lease" )
14041439 }
14051440
14061441 // Copy the lease from the left-hand side of the split over to the
14071442 // right-hand side so that it can immediately start serving requests.
14081443 // When doing so, we need to make a few modifications.
1409- rightLease := leftLease
1444+ rightLease := in . leftLease
14101445 // Rebind the lease to the existing leaseholder store's replica from the
14111446 // right-hand side's descriptor.
14121447 var ok bool
1413- rightLease .Replica , ok = split .RightDesc .GetReplicaDescriptor (leftLease .Replica .StoreID )
1448+ rightLease .Replica , ok = split .RightDesc .GetReplicaDescriptor (in . leftLease .Replica .StoreID )
14141449 if ! ok {
14151450 return enginepb.MVCCStats {}, result.Result {}, errors .Errorf (
14161451 "pre-split lease holder %+v not found in post-split descriptor %+v" ,
1417- leftLease .Replica , split .RightDesc ,
1452+ in . leftLease .Replica , split .RightDesc ,
14181453 )
14191454 }
14201455 // Convert leader leases into expiration-based leases. A leader lease is
@@ -1429,18 +1464,9 @@ func splitTriggerHelper(
14291464 rightLease .Term = 0
14301465 rightLease .MinExpiration = hlc.Timestamp {}
14311466 }
1432-
1433- gcThreshold , err := sl .LoadGCThreshold (ctx , batch )
1434- if err != nil {
1435- return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to load GCThreshold" )
1436- }
1437- if gcThreshold .IsEmpty () {
1467+ if in .gcThreshold .IsEmpty () {
14381468 log .VEventf (ctx , 1 , "LHS's GCThreshold of split is not set" )
14391469 }
1440- gcHint , err := sl .LoadGCHint (ctx , batch )
1441- if err != nil {
1442- return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to load GCHint" )
1443- }
14441470
14451471 // Writing the initial state is subtle since this also seeds the Raft
14461472 // group. It becomes more subtle due to proposer-evaluated Raft.
@@ -1471,13 +1497,9 @@ func splitTriggerHelper(
14711497 // HardState via a call to synthesizeRaftState. Here, we only call
14721498 // writeInitialReplicaState which essentially writes a ReplicaState
14731499 // only.
1474- replicaVersion , err := sl .LoadVersion (ctx , batch )
1475- if err != nil {
1476- return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to load replica version" )
1477- }
14781500 if * h .AbsPostSplitRight (), err = stateloader .WriteInitialReplicaState (
14791501 ctx , batch , * h .AbsPostSplitRight (), split .RightDesc , rightLease ,
1480- * gcThreshold , * gcHint , replicaVersion ,
1502+ * in . gcThreshold , * in . gcHint , in . replicaVersion ,
14811503 ); err != nil {
14821504 return enginepb.MVCCStats {}, result.Result {}, errors .Wrap (err , "unable to write initial Replica state" )
14831505 }
0 commit comments