Skip to content

Commit 2c2e73e

Browse files
committed
feat: add config to control the memory usage for raft pending
Change-Id: I2cca5ade3c608ab4cf064ea551da5343a3f7fd56
1 parent d34f270 commit 2c2e73e

File tree

6 files changed

+98
-1
lines changed

6 files changed

+98
-1
lines changed

dledger/src/main/java/io/openmessaging/storage/dledger/DLedgerConfig.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,18 @@ public class DLedgerConfig {
4848

4949
private int peerPushQuota = 20 * 1024 * 1024;
5050

51+
private int peerPushPendingMaxBytes = 200 * 1024 * 1024;
52+
5153
private String storeType = FILE; //FILE, MEMORY
5254
private String dataStorePath;
5355

5456
private int maxPendingRequestsNum = 10000;
5557

56-
private int maxWaitAckTimeMs = 2500;
58+
private int maxPendingCommitIndexNum = 800000;
59+
60+
private int maxPendingCommitBytes = 500 * 1024 * 1024;
61+
62+
private int maxWaitAckTimeMs = 5000;
5763

5864
private int maxPushTimeOutMs = 1000;
5965

@@ -255,6 +261,22 @@ public void setMaxPendingRequestsNum(int maxPendingRequestsNum) {
255261
this.maxPendingRequestsNum = maxPendingRequestsNum;
256262
}
257263

264+
public int getMaxPendingCommitIndexNum() {
265+
return maxPendingCommitIndexNum;
266+
}
267+
268+
public void setMaxPendingCommitIndexNum(int max) {
269+
this.maxPendingCommitIndexNum = max;
270+
}
271+
272+
public int getMaxPendingCommitBytes() {
273+
return maxPendingCommitBytes;
274+
}
275+
276+
public void setMaxPendingCommitBytes(int max) {
277+
this.maxPendingCommitBytes = max;
278+
}
279+
258280
public int getMaxWaitAckTimeMs() {
259281
return maxWaitAckTimeMs;
260282
}
@@ -371,6 +393,14 @@ public void setPeerPushThrottlePoint(int peerPushThrottlePoint) {
371393
this.peerPushThrottlePoint = peerPushThrottlePoint;
372394
}
373395

396+
public int getPeerPushPendingMaxBytes() {
397+
return peerPushPendingMaxBytes;
398+
}
399+
400+
public void setPeerPushPendingMaxBytes(int peerPushPendingMaxBytes) {
401+
this.peerPushPendingMaxBytes = peerPushPendingMaxBytes;
402+
}
403+
374404
public int getPeerPushQuota() {
375405
return peerPushQuota;
376406
}

dledger/src/main/java/io/openmessaging/storage/dledger/DLedgerEntryPusher.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import io.openmessaging.storage.dledger.entry.DLedgerEntry;
2626
import io.openmessaging.storage.dledger.exception.DLedgerException;
2727
import io.openmessaging.storage.dledger.metrics.DLedgerMetricsManager;
28+
import io.openmessaging.storage.dledger.protocol.AppendEntryResponse;
2829
import io.openmessaging.storage.dledger.protocol.DLedgerResponseCode;
2930
import io.openmessaging.storage.dledger.protocol.InstallSnapshotRequest;
3031
import io.openmessaging.storage.dledger.protocol.InstallSnapshotResponse;
@@ -168,8 +169,22 @@ public long getPeerWaterMark(long term, String peerId) {
168169
}
169170
}
170171

172+
public int getPendingCount(long currTerm) {
173+
if (pendingClosure == null) {
174+
return 0;
175+
}
176+
ConcurrentMap<Long, Closure> pendings = pendingClosure.get(currTerm);
177+
if (pendings == null) {
178+
return 0;
179+
}
180+
return pendings.size();
181+
}
182+
171183
public boolean isPendingFull(long currTerm) {
172184
checkTermForPendingMap(currTerm, "isPendingFull");
185+
if (dLedgerStore.isLocalToomuchUncommitted()) {
186+
return true;
187+
}
173188
return pendingClosure.get(currTerm).size() > dLedgerConfig.getMaxPendingRequestsNum();
174189
}
175190

@@ -227,17 +242,30 @@ public void checkResponseFuturesTimeout(final long beginIndex) {
227242
}
228243
ConcurrentMap<Long, Closure> closureMap = this.pendingClosure.get(term);
229244
if (closureMap != null && closureMap.size() > 0) {
245+
boolean anyChecked = false;
230246
for (long i = beginIndex; i < maxIndex; i++) {
231247
Closure closure = closureMap.get(i);
232248
if (closure == null) {
233249
// index may be removed for complete, we should continue scan
234250
} else if (closure.isTimeOut()) {
235251
closure.done(Status.error(DLedgerResponseCode.WAIT_QUORUM_ACK_TIMEOUT));
236252
closureMap.remove(i);
253+
anyChecked = true;
237254
} else {
255+
anyChecked = true;
238256
break;
239257
}
240258
}
259+
if (!anyChecked) {
260+
// since the batch append may have index discontinuous, we should check here
261+
for (Map.Entry<Long, Closure> futureEntry : closureMap.entrySet()) {
262+
Closure closure = futureEntry.getValue();
263+
if (closure.isTimeOut()) {
264+
closure.done(Status.error(DLedgerResponseCode.WAIT_QUORUM_ACK_TIMEOUT));
265+
closureMap.remove(futureEntry.getKey());
266+
}
267+
}
268+
}
241269
}
242270
}
243271

@@ -334,6 +362,7 @@ public void doWork() {
334362
// advance the commit index
335363
// we can only commit the index whose term is equals to current term (refer to raft paper 5.4.2)
336364
if (DLedgerEntryPusher.this.memberState.leaderUpdateCommittedIndex(currTerm, quorumIndex)) {
365+
dLedgerStore.updateCommittedIndex(quorumIndex);
337366
DLedgerEntryPusher.this.fsmCaller.onCommitted(quorumIndex);
338367
} else {
339368
// If the commit index is not advanced, we should wait for the next round
@@ -941,6 +970,7 @@ private void handleDoAppend(long writeIndex, PushEntryRequest request,
941970
future.complete(buildResponse(request, DLedgerResponseCode.SUCCESS.getCode()));
942971
long committedIndex = Math.min(dLedgerStore.getLedgerEndIndex(), request.getCommitIndex());
943972
if (DLedgerEntryPusher.this.memberState.followerUpdateCommittedIndex(committedIndex)) {
973+
dLedgerStore.updateCommittedIndex(committedIndex);
944974
DLedgerEntryPusher.this.fsmCaller.onCommitted(committedIndex);
945975
}
946976
} catch (Throwable t) {
@@ -1004,6 +1034,7 @@ private CompletableFuture<PushEntryResponse> handleDoCommit(long committedIndex,
10041034
PreConditions.check(request.getType() == PushEntryRequest.Type.COMMIT, DLedgerResponseCode.UNKNOWN);
10051035
committedIndex = committedIndex <= dLedgerStore.getLedgerEndIndex() ? committedIndex : dLedgerStore.getLedgerEndIndex();
10061036
if (DLedgerEntryPusher.this.memberState.followerUpdateCommittedIndex(committedIndex)) {
1037+
dLedgerStore.updateCommittedIndex(committedIndex);
10071038
DLedgerEntryPusher.this.fsmCaller.onCommitted(committedIndex);
10081039
}
10091040
future.complete(buildResponse(request, DLedgerResponseCode.SUCCESS.getCode()));
@@ -1024,6 +1055,7 @@ private CompletableFuture<PushEntryResponse> handleDoTruncate(long truncateIndex
10241055
future.complete(buildResponse(request, DLedgerResponseCode.SUCCESS.getCode()));
10251056
long committedIndex = request.getCommitIndex() <= dLedgerStore.getLedgerEndIndex() ? request.getCommitIndex() : dLedgerStore.getLedgerEndIndex();
10261057
if (DLedgerEntryPusher.this.memberState.followerUpdateCommittedIndex(committedIndex)) {
1058+
dLedgerStore.updateCommittedIndex(committedIndex);
10271059
DLedgerEntryPusher.this.fsmCaller.onCommitted(committedIndex);
10281060
}
10291061
} catch (Throwable t) {

dledger/src/main/java/io/openmessaging/storage/dledger/DLedgerServer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,8 @@ public AppendFuture<AppendEntryResponse> appendAsLeader(List<byte[]> bodies) thr
354354
}
355355
// only wait last entry ack is ok
356356
future = new BatchAppendFuture<>(positions);
357+
long batchTimeout = (long)dLedgerConfig.getMaxWaitAckTimeMs() * (positions.length / 1000 + 1);
358+
future.setTimeOutMs(batchTimeout);
357359
} else {
358360
DLedgerEntry dLedgerEntry = new DLedgerEntry();
359361
totalBytes += bodies.get(0).length;

dledger/src/main/java/io/openmessaging/storage/dledger/store/DLedgerMemoryStore.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,4 +184,9 @@ public void flush() {
184184
public long getLedgerEndTerm() {
185185
return ledgerEndTerm;
186186
}
187+
188+
@Override
189+
public boolean isLocalToomuchUncommitted() {
190+
return getLedgerEndIndex() - memberState.getCommittedIndex() > dLedgerConfig.getMaxPendingCommitIndexNum();
191+
}
187192
}

dledger/src/main/java/io/openmessaging/storage/dledger/store/DLedgerStore.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ public abstract class DLedgerStore {
2929

3030
public abstract DLedgerEntry get(Long index);
3131

32+
// should check both index and uncommitted data size
33+
public boolean isLocalToomuchUncommitted() {
34+
return false;
35+
}
36+
3237
public abstract long getLedgerEndTerm();
3338

3439
public abstract long getLedgerEndIndex();
@@ -37,6 +42,8 @@ public abstract class DLedgerStore {
3742

3843
public abstract long getLedgerBeforeBeginTerm();
3944

45+
public void updateCommittedIndex(long index) {}
46+
4047
protected void updateLedgerEndIndexAndTerm() {
4148
if (getMemberState() != null) {
4249
getMemberState().updateLedgerIndexAndTerm(getLedgerEndIndex(), getLedgerEndTerm());

dledger/src/main/java/io/openmessaging/storage/dledger/store/file/DLedgerMmapFileStore.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public class DLedgerMmapFileStore extends DLedgerStore {
5050

5151
private long ledgerEndIndex = -1;
5252
private long ledgerEndTerm;
53+
private long committedPos;
5354
private final DLedgerConfig dLedgerConfig;
5455
private final MemberState memberState;
5556
private final MmapFileList dataFileList;
@@ -654,6 +655,16 @@ public DLedgerEntry getFirstLogOfTargetTerm(long targetTerm, long endIndex) {
654655
return entry;
655656
}
656657

658+
@Override
659+
public void updateCommittedIndex(long index) {
660+
Pair<Long, Integer> posAndSize = getEntryPosAndSize(index);
661+
this.committedPos = posAndSize.getKey() + posAndSize.getValue();
662+
}
663+
664+
private long getCommittedPos() {
665+
return this.committedPos;
666+
}
667+
657668
private Pair<Long, Integer> getEntryPosAndSize(Long index) {
658669
indexCheck(index);
659670
SelectMmapBufferResult indexSbr = null;
@@ -681,6 +692,16 @@ public long getLedgerEndTerm() {
681692
return ledgerEndTerm;
682693
}
683694

695+
@Override
696+
public boolean isLocalToomuchUncommitted() {
697+
long fallBehindBytes = dataFileList.getMaxWrotePosition() - getCommittedPos();
698+
if (fallBehindBytes > dLedgerConfig.getMaxPendingCommitBytes()) {
699+
return true;
700+
}
701+
702+
return getLedgerEndIndex() - memberState.getCommittedIndex() > dLedgerConfig.getMaxPendingCommitIndexNum();
703+
}
704+
684705
public void addAppendHook(AppendHook writeHook) {
685706
if (!appendHooks.contains(writeHook)) {
686707
appendHooks.add(writeHook);

0 commit comments

Comments
 (0)