From aade4845ab3e4c250b055a85ce0ba699ba4c7241 Mon Sep 17 00:00:00 2001 From: Josef Ezra Date: Thu, 6 Nov 2025 17:50:38 -0500 Subject: [PATCH 1/3] IndexingMerger: lessen work and retry by default Catching an exception, the IndexingMerger attempts to detect if it should lessen work and retry. Currently it is an include list of known "retyable" errors. If the error is not detected correctly, or not predicted, it will not retry. Being a background process, however, it is better and safer to retry by default. Resolves #3731 --- .../provider/foundationdb/IndexingBase.java | 18 ------------------ .../provider/foundationdb/IndexingMerger.java | 8 +++++++- .../foundationdb/IndexingThrottle.java | 19 ++++++++++++++++++- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingBase.java b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingBase.java index 5095076d57..b23228f837 100644 --- a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingBase.java +++ b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingBase.java @@ -20,7 +20,6 @@ package com.apple.foundationdb.record.provider.foundationdb; -import com.apple.foundationdb.FDBError; import com.apple.foundationdb.FDBException; import com.apple.foundationdb.MutationType; import com.apple.foundationdb.annotation.API; @@ -59,7 +58,6 @@ import java.time.DateTimeException; import java.time.Instant; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -1213,22 +1211,6 @@ protected static T findException(@Nullable Throwable ex, Class classT) { } return null; } - - protected static boolean shouldLessenWork(@Nullable FDBException ex) { - // These error codes represent a list of errors that can occur if there is too much work to be done - // in a single transaction. - if (ex == null) { - return false; - } - final Set lessenWorkCodes = new HashSet<>(Arrays.asList( - FDBError.TIMED_OUT.code(), - FDBError.TRANSACTION_TOO_OLD.code(), - FDBError.NOT_COMMITTED.code(), - FDBError.TRANSACTION_TIMED_OUT.code(), - FDBError.COMMIT_READ_INCOMPLETE.code(), - FDBError.TRANSACTION_TOO_LARGE.code())); - return lessenWorkCodes.contains(ex.getCode()); - } } diff --git a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java index b3a4cb6dc6..f4cc82fe6a 100644 --- a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java +++ b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java @@ -150,7 +150,7 @@ private CompletableFuture handleFailure(final IndexDeferredMaintenanceC mergeControl.mergeHadFailed(); // report to adjust stats final FDBException ex = IndexingBase.findException(e, FDBException.class); final IndexDeferredMaintenanceControl.LastStep lastStep = mergeControl.getLastStep(); - if (!IndexingBase.shouldLessenWork(ex)) { + if (shouldAbort(ex)) { giveUpMerging(mergeControl, e); } switch (lastStep) { @@ -175,6 +175,12 @@ private CompletableFuture handleFailure(final IndexDeferredMaintenanceC return AsyncUtil.READY_TRUE; // and retry } + @SuppressWarnings("unused") + private boolean shouldAbort(@Nullable FDBException ex) { + // TODO: return true if the exception is clearly not retriable + return false; + } + private void handleRepartitioningFailure(final IndexDeferredMaintenanceControl mergeControl, Throwable e) { repartitionDocumentCount = mergeControl.getRepartitionDocumentCount(); if (repartitionDocumentCount == -1) { diff --git a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingThrottle.java b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingThrottle.java index c9e76ee1ef..eecb6ca7db 100644 --- a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingThrottle.java +++ b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingThrottle.java @@ -20,6 +20,7 @@ package com.apple.foundationdb.record.provider.foundationdb; +import com.apple.foundationdb.FDBError; import com.apple.foundationdb.FDBException; import com.apple.foundationdb.annotation.API; import com.apple.foundationdb.async.AsyncUtil; @@ -133,7 +134,7 @@ boolean mayRetryAfterHandlingException(@Nullable FDBException fdbException, @Nullable List additionalLogMessageKeyValues, int currTries, final boolean adjustLimits) { - if (currTries >= common.config.getMaxRetries() || !IndexingBase.shouldLessenWork(fdbException)) { + if (currTries >= common.config.getMaxRetries() || !shouldLessenWork(fdbException)) { // Here: should not retry or no more retries. There is no real need to handle limits. return false; } @@ -144,6 +145,22 @@ boolean mayRetryAfterHandlingException(@Nullable FDBException fdbException, return true; } + private static boolean shouldLessenWork(@Nullable FDBException ex) { + // These error codes represent a list of errors that can occur if there is too much work to be done + // in a single transaction. + if (ex == null) { + return false; + } + final Set lessenWorkCodes = new HashSet<>(Arrays.asList( + FDBError.TIMED_OUT.code(), + FDBError.TRANSACTION_TOO_OLD.code(), + FDBError.NOT_COMMITTED.code(), + FDBError.TRANSACTION_TIMED_OUT.code(), + FDBError.COMMIT_READ_INCOMPLETE.code(), + FDBError.TRANSACTION_TOO_LARGE.code())); + return lessenWorkCodes.contains(ex.getCode()); + } + void decreaseLimit(@Nonnull FDBException fdbException, @Nullable List additionalLogMessageKeyValues) { // TODO: decrease the limit only for certain errors From 06ff45155c4f2a2c840a576de6213c5a13b4d537 Mon Sep 17 00:00:00 2001 From: Josef Ezra Date: Thu, 6 Nov 2025 18:31:59 -0500 Subject: [PATCH 2/3] Backward compatible - abort if the exception is not an FDBException --- .../record/provider/foundationdb/IndexingMerger.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java index f4cc82fe6a..e08910e2b5 100644 --- a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java +++ b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java @@ -175,8 +175,10 @@ private CompletableFuture handleFailure(final IndexDeferredMaintenanceC return AsyncUtil.READY_TRUE; // and retry } - @SuppressWarnings("unused") private boolean shouldAbort(@Nullable FDBException ex) { + if (ex == null) { + return true; + } // TODO: return true if the exception is clearly not retriable return false; } From 168a3121f5cb1bc465e530ec4fcdbd49f9eb4845 Mon Sep 17 00:00:00 2001 From: Josef Ezra Date: Mon, 10 Nov 2025 18:27:10 -0500 Subject: [PATCH 3/3] Tmp kludge for testing --- .../provider/foundationdb/IndexingMerger.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java index e08910e2b5..ae873586e0 100644 --- a/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java +++ b/fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingMerger.java @@ -148,9 +148,8 @@ private CompletableFuture handleFailure(final IndexDeferredMaintenanceC // merges. Not perfect, but as long as it's rare the impact should be minimal. mergeControl.mergeHadFailed(); // report to adjust stats - final FDBException ex = IndexingBase.findException(e, FDBException.class); final IndexDeferredMaintenanceControl.LastStep lastStep = mergeControl.getLastStep(); - if (shouldAbort(ex)) { + if (shouldAbort(e)) { giveUpMerging(mergeControl, e); } switch (lastStep) { @@ -175,7 +174,17 @@ private CompletableFuture handleFailure(final IndexDeferredMaintenanceC return AsyncUtil.READY_TRUE; // and retry } - private boolean shouldAbort(@Nullable FDBException ex) { + private boolean shouldAbort(@Nullable Throwable e) { + if (e == null) { + return true; + } + // Expecting AsyncToSyncTimeoutException or an instance of TimeoutException. However, cannot + // refer to AsyncToSyncTimeoutException without creating a lucene dependency + // TODO: remove this kludge + if (e.getClass().getCanonicalName().contains("Timeout") ) { + return false; + } + final FDBException ex = IndexingBase.findException(e, FDBException.class); if (ex == null) { return true; }