Skip to content

Commit 82ea5d5

Browse files
committed
tla/storage: implement prohibiting recovery and rebalance before sync
This commit prohibits any recovery or rebalancer related stuff on the masters, which have not yet synced with the replicaset after becoming master. Part of #214 NO_DOC=tla NO_TEST=tla
1 parent a7e7851 commit 82ea5d5

File tree

1 file changed

+34
-8
lines changed

1 file changed

+34
-8
lines changed

proofs/tla/src/storage.tla

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ StorageType == [
103103
gcAck : [BucketIds -> SUBSET Storages],
104104
sendWaitTarget : [BucketIds -> [Storages -> Nat]],
105105
sendingBuckets : SUBSET BucketIds,
106+
masterWaitTarget : [Storages -> Nat],
106107
\* For limiting tests, it cannot be done outside of the module, since
107108
\* a test doesn't know, whether the bucket send or ref actually happened.
108109
errinj : [
@@ -158,6 +159,7 @@ StorageInit ==
158159
gcAck |-> [b \in BucketIds |-> {}],
159160
sendWaitTarget |-> [b \in BucketIds |-> [s \in Storages |-> 0]],
160161
sendingBuckets |-> {},
162+
masterWaitTarget |-> [s \in Storages |-> 0],
161163
errinj |-> [
162164
bucketSendCount |-> 0,
163165
bucketRWRefCount |-> 0,
@@ -188,6 +190,7 @@ StorageState(i) == [
188190
gcAck |-> storages[i].gcAck,
189191
sendWaitTarget |-> storages[i].sendWaitTarget,
190192
sendingBuckets |-> storages[i].sendingBuckets,
193+
masterWaitTarget |-> storages[i].masterWaitTarget,
191194
errinj |-> storages[i].errinj
192195
]
193196

@@ -200,7 +203,8 @@ StorageStateApply(i, state) ==
200203
VarSet(i, "gcAck", state.gcAck,
201204
VarSet(i, "sendWaitTarget", state.sendWaitTarget,
202205
VarSet(i, "sendingBuckets", state.sendingBuckets,
203-
VarSet(i, "errinj", state.errinj, storages)))))))))
206+
VarSet(i, "masterWaitTarget", state.masterWaitTarget,
207+
VarSet(i, "errinj", state.errinj, storages))))))))))
204208
/\ network' =
205209
[s \in Storages |->
206210
[t \in Storages |->
@@ -256,8 +260,28 @@ ProcessReplicationBucket(state, j) ==
256260
(* Failover master change *)
257261
(***************************************************************************)
258262

263+
\* True iff this node's current vclock has reached the stored target.
264+
MasterSyncDone(state) ==
265+
\A s \in Storages : state.vclock[s] >= state.masterWaitTarget[s]
266+
267+
\* Max of a non-empty set of Nats.
268+
MaxNat(S) == CHOOSE m \in S : \A x \in S : m >= x
269+
270+
\* Compute the one-shot target when i becomes master:
271+
\* for every source s, the maximum vclock[s] seen among peers at that instant.
272+
MasterCatchupTarget(i) ==
273+
[ s \in Storages |->
274+
IF OtherReplicasInReplicaset(i) = {} THEN 0
275+
ELSE MaxNat({storages[p].vclock[s] : p \in OtherReplicasInReplicaset(i)})
276+
]
277+
278+
\* Until master synchronizes with other repliacas, rebalancer and recovery
279+
\* are not allowed.
259280
BecomeMaster(state) ==
260-
IF ~IsMaster(state) THEN [state EXCEPT !.status = "master"] ELSE state
281+
IF ~IsMaster(state) THEN
282+
LET target == MasterCatchupTarget(state.id) IN
283+
[state EXCEPT !.status = "master", !.masterWaitTarget = target]
284+
ELSE state
261285

262286
BecomeReplica(state) ==
263287
IF IsMaster(state) THEN [state EXCEPT !.status = "replica"] ELSE state
@@ -315,6 +339,7 @@ BucketSendWaitAndSend(state, b, j) ==
315339
/\ b \in state.sendingBuckets
316340
/\ j # state.id
317341
/\ AllReplicasCaughtUp(state, b)
342+
/\ MasterSyncDone(state)
318343
THEN
319344
LET msg == [type |-> "BUCKET_RECV",
320345
content |-> [bucket |-> b, final |-> FALSE]]
@@ -333,7 +358,7 @@ BucketDropFromSending(state, b) ==
333358
BucketSendStart(state, b, j) ==
334359
IF IsMaster(state) /\ state.buckets[b].status = "ACTIVE" /\
335360
state.bucketRefs[b].rw = 0 /\ ~state.bucketRefs[b].rw_lock /\
336-
b \notin state.transferingBuckets /\ j # state.id
361+
b \notin state.transferingBuckets /\ j # state.id /\ MasterSyncDone(state)
337362
THEN LET state1 == [state EXCEPT
338363
!.bucketRefs[b].rw_lock = TRUE,
339364
!.transferingBuckets = @ \union {b},
@@ -348,7 +373,7 @@ BucketSendStart(state, b, j) ==
348373

349374
BucketRecvStart(state, j) ==
350375
LET msg == Head(state.networkReceive[j]) IN
351-
IF msg.type = "BUCKET_RECV" /\ ~msg.content.final THEN
376+
IF msg.type = "BUCKET_RECV" /\ ~msg.content.final /\ MasterSyncDone(state) THEN
352377
LET b == msg.content.bucket IN
353378
IF IsMaster(state) /\ state.buckets[b].status = NULL /\
354379
b \notin state.transferingBuckets
@@ -376,7 +401,7 @@ BucketSendFinish(state, j) ==
376401
LET b == msg.content.bucket
377402
ok == msg.content.status
378403
IN
379-
IF IsMaster(state) /\ b \in state.transferingBuckets THEN
404+
IF IsMaster(state) /\ b \in state.transferingBuckets /\ MasterSyncDone(state) THEN
380405
LET state1 == [state EXCEPT !.networkReceive[j] = Tail(@)] IN
381406
IF ok THEN
382407
\* Receiver accepted the bucket - mark as SENT
@@ -406,7 +431,7 @@ BucketRecvFinish(state, j) ==
406431
IF msg.type = "BUCKET_RECV" /\ msg.content.final THEN
407432
LET b == msg.content.bucket IN
408433
IF IsMaster(state) /\ state.buckets[b].status = "RECEIVING" /\
409-
b \notin state.transferingBuckets
434+
b \notin state.transferingBuckets /\ MasterSyncDone(state)
410435
THEN LET state1 == [state EXCEPT
411436
!.networkReceive[j] = Tail(@)] IN
412437
LET state2 ==
@@ -432,6 +457,7 @@ RecoverySendStatRequest(state, b) ==
432457
/\ state.buckets[b].status \in {"SENDING", "RECEIVING"}
433458
/\ ~(b \in state.transferingBuckets)
434459
/\ dest_rs # NULL
460+
/\ MasterSyncDone(state)
435461
THEN
436462
\* Choose any storage in the destination replicaset.
437463
LET candidates == {s \in Storages :
@@ -447,7 +473,7 @@ ProcessRecoveryStatRequest(state, j) ==
447473
LET msg == Head(state.networkReceive[j]) IN
448474
IF msg.type # "RECOVERY_BUCKET_STAT" THEN state
449475
ELSE
450-
IF ~IsMaster(state) THEN
476+
IF ~IsMaster(state) \/ ~MasterSyncDone(state) THEN
451477
\* Drop message if this node is not a master.
452478
[state EXCEPT !.networkReceive[j] = Tail(@)]
453479
ELSE
@@ -466,7 +492,7 @@ ProcessRecoveryStatResponse(state, j) ==
466492
LET msg == Head(state.networkReceive[j]) IN
467493
IF msg.type # "RECOVERY_BUCKET_STAT_RESPONSE" THEN state
468494
ELSE
469-
IF ~IsMaster(state) THEN
495+
IF ~IsMaster(state) \/ ~MasterSyncDone(state) THEN
470496
\* Drop message if this node is not a master.
471497
[state EXCEPT !.networkReceive[j] = Tail(@)]
472498
ELSE

0 commit comments

Comments
 (0)