From b1198bc9f30c06426157036a7dc7a9f90e3bd9fc Mon Sep 17 00:00:00 2001 From: Josef Ezra Date: Fri, 31 Oct 2025 14:30:58 -0400 Subject: [PATCH 1/4] Prevent parallel file lock clear FDBDirectory: When file lock clear is called in parallel by multiple threads, synchronically waiting for the first thread's clear to complete seems to be a better strategy. Resolves #3713 --- .../directory/FDBDirectoryLockFactory.java | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java index ca57a79140..e43a1a320f 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java @@ -210,40 +210,44 @@ private void fileLockCheckNewLock(byte[] val, long nowMillis) { } private void fileLockClearFlushAndClose(boolean isRecovery) { - Function> fileLockFunc = aContext -> - aContext.ensureActive().get(fileLockKey) - .thenAccept(val -> { - synchronized (fileLockSetLock) { + synchronized (fileLockSetLock) { + // If called by multiple threads, it seems to be a good idea to block all and allow one of them to do the actual closing. + if (closed) { + return; + } + Function> fileLockFunc = aContext -> + aContext.ensureActive().get(fileLockKey) + .thenAccept(val -> { UUID existingUuid = fileLockValueToUuid(val); if (existingUuid != null && existingUuid.compareTo(selfStampUuid) == 0) { // clear the lock if locked and matches uuid aContext.ensureActive().clear(fileLockKey); closingContext = aContext; logSelf(isRecovery ? "FileLock: Cleared in Recovery path" : "FileLock: Cleared"); - } else if (! isRecovery) { + } else if (!isRecovery) { throw new AlreadyClosedException("FileLock: Expected to be locked during close.This=" + this + " existingUuid=" + existingUuid); // The string append methods should handle null arguments. } - } - }); + }); - if (agilityContext.isClosed()) { - // Here: this is considered to be a recovery path, may bypass closed context. - agilityContext.asyncToSync(LuceneEvents.Waits.WAIT_LUCENE_FILE_LOCK_CLEAR, - agilityContext.applyInRecoveryPath(fileLockFunc)); - } else { - // Here: this called during directory close to ensure cleared lock - - agilityContext.asyncToSync(LuceneEvents.Waits.WAIT_LUCENE_FILE_LOCK_CLEAR, - agilityContext.apply(fileLockFunc)); - } - agilityContext.recordEvent(LuceneEvents.Events.LUCENE_FILE_LOCK_DURATION, System.nanoTime() - lockStartTime); - boolean flushed = false; - try { - closed = true; // prevent lock stamp update - agilityContext.flush(); - flushed = true; - } finally { - closed = flushed; // allow close retry - closingContext = null; + if (agilityContext.isClosed()) { + // Here: this is considered to be a recovery path, may bypass closed context. + agilityContext.asyncToSync(LuceneEvents.Waits.WAIT_LUCENE_FILE_LOCK_CLEAR, + agilityContext.applyInRecoveryPath(fileLockFunc)); + } else { + // Here: this called during directory close to ensure cleared lock - + agilityContext.asyncToSync(LuceneEvents.Waits.WAIT_LUCENE_FILE_LOCK_CLEAR, + agilityContext.apply(fileLockFunc)); + } + agilityContext.recordEvent(LuceneEvents.Events.LUCENE_FILE_LOCK_DURATION, System.nanoTime() - lockStartTime); + boolean flushed = false; + try { + closed = true; // prevent lock stamp update + agilityContext.flush(); + flushed = true; + } finally { + closed = flushed; // allow close retry + closingContext = null; + } } } From 267a74470bd86e62da37c09844c8139ad2496ebf Mon Sep 17 00:00:00 2001 From: Josef Ezra Date: Sat, 1 Nov 2025 03:54:24 -0400 Subject: [PATCH 2/4] Use recursion guard --- .../directory/FDBDirectoryLockFactory.java | 68 +++++++++++-------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java index e43a1a320f..5884c6a437 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java @@ -92,6 +92,7 @@ protected static class FDBDirectoryLock extends Lock { */ private FDBRecordContext closingContext = null; private final Object fileLockSetLock = new Object(); + private boolean clearingLockNow = false; private FDBDirectoryLock(final AgilityContext agilityContext, final String lockName, byte[] fileLockKey, int timeWindowMilliseconds) { this.agilityContext = agilityContext; @@ -209,45 +210,54 @@ private void fileLockCheckNewLock(byte[] val, long nowMillis) { } } + private synchronized boolean inRecursiveClearLock() { + if (clearingLockNow) { + return true; + } + clearingLockNow = true; + return false; + } + private void fileLockClearFlushAndClose(boolean isRecovery) { - synchronized (fileLockSetLock) { - // If called by multiple threads, it seems to be a good idea to block all and allow one of them to do the actual closing. - if (closed) { - return; - } - Function> fileLockFunc = aContext -> - aContext.ensureActive().get(fileLockKey) - .thenAccept(val -> { + if (inRecursiveClearLock()) { + // Here: this function is being called from too many paths. Until cleanup, this guard is here to avoid recursions + return; + } + Function> fileLockFunc = aContext -> + aContext.ensureActive().get(fileLockKey) + .thenAccept(val -> { + synchronized (fileLockSetLock) { UUID existingUuid = fileLockValueToUuid(val); if (existingUuid != null && existingUuid.compareTo(selfStampUuid) == 0) { // clear the lock if locked and matches uuid aContext.ensureActive().clear(fileLockKey); closingContext = aContext; logSelf(isRecovery ? "FileLock: Cleared in Recovery path" : "FileLock: Cleared"); - } else if (!isRecovery) { + } else if (! isRecovery) { throw new AlreadyClosedException("FileLock: Expected to be locked during close.This=" + this + " existingUuid=" + existingUuid); // The string append methods should handle null arguments. } - }); + } + }); - if (agilityContext.isClosed()) { - // Here: this is considered to be a recovery path, may bypass closed context. - agilityContext.asyncToSync(LuceneEvents.Waits.WAIT_LUCENE_FILE_LOCK_CLEAR, - agilityContext.applyInRecoveryPath(fileLockFunc)); - } else { - // Here: this called during directory close to ensure cleared lock - - agilityContext.asyncToSync(LuceneEvents.Waits.WAIT_LUCENE_FILE_LOCK_CLEAR, - agilityContext.apply(fileLockFunc)); - } - agilityContext.recordEvent(LuceneEvents.Events.LUCENE_FILE_LOCK_DURATION, System.nanoTime() - lockStartTime); - boolean flushed = false; - try { - closed = true; // prevent lock stamp update - agilityContext.flush(); - flushed = true; - } finally { - closed = flushed; // allow close retry - closingContext = null; - } + if (agilityContext.isClosed()) { + // Here: this is considered to be a recovery path, may bypass closed context. + agilityContext.asyncToSync(LuceneEvents.Waits.WAIT_LUCENE_FILE_LOCK_CLEAR, + agilityContext.applyInRecoveryPath(fileLockFunc)); + } else { + // Here: this called during directory close to ensure cleared lock - + agilityContext.asyncToSync(LuceneEvents.Waits.WAIT_LUCENE_FILE_LOCK_CLEAR, + agilityContext.apply(fileLockFunc)); + } + agilityContext.recordEvent(LuceneEvents.Events.LUCENE_FILE_LOCK_DURATION, System.nanoTime() - lockStartTime); + boolean flushed = false; + try { + closed = true; // prevent lock stamp update + agilityContext.flush(); + flushed = true; + } finally { + closed = flushed; // allow close retry + closingContext = null; + clearingLockNow = false; } } From e81f7d28c368857ea92a496ed551245107a478fd Mon Sep 17 00:00:00 2001 From: Josef Ezra Date: Sat, 1 Nov 2025 04:41:14 -0400 Subject: [PATCH 3/4] Avoid spotbug issue --- .../directory/FDBDirectoryLockFactory.java | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java index 5884c6a437..6a302d2786 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java @@ -93,6 +93,7 @@ protected static class FDBDirectoryLock extends Lock { private FDBRecordContext closingContext = null; private final Object fileLockSetLock = new Object(); private boolean clearingLockNow = false; + private final Object clearingLockNowLock = new Object(); private FDBDirectoryLock(final AgilityContext agilityContext, final String lockName, byte[] fileLockKey, int timeWindowMilliseconds) { this.agilityContext = agilityContext; @@ -210,18 +211,13 @@ private void fileLockCheckNewLock(byte[] val, long nowMillis) { } } - private synchronized boolean inRecursiveClearLock() { - if (clearingLockNow) { - return true; - } - clearingLockNow = true; - return false; - } - private void fileLockClearFlushAndClose(boolean isRecovery) { - if (inRecursiveClearLock()) { + synchronized (clearingLockNowLock) { // Here: this function is being called from too many paths. Until cleanup, this guard is here to avoid recursions - return; + if (clearingLockNow) { + return; + } + clearingLockNow = true; } Function> fileLockFunc = aContext -> aContext.ensureActive().get(fileLockKey) @@ -257,7 +253,10 @@ private void fileLockClearFlushAndClose(boolean isRecovery) { } finally { closed = flushed; // allow close retry closingContext = null; - clearingLockNow = false; + synchronized (clearingLockNowLock) { + // clearing under lock to avoid spotbugsMain's "Inconsistent synchronization" issue. + clearingLockNow = false; + } } } From 4d5d5813ee3d3080417eee1ba687b2d1a0bf6082 Mon Sep 17 00:00:00 2001 From: Josef Ezra Date: Fri, 14 Nov 2025 16:24:48 -0500 Subject: [PATCH 4/4] (WIP) clear clearingLockNow in a whenComplete future stage --- .../directory/FDBDirectoryLockFactory.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java index 6a302d2786..05d09363e4 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryLockFactory.java @@ -212,8 +212,12 @@ private void fileLockCheckNewLock(byte[] val, long nowMillis) { } private void fileLockClearFlushAndClose(boolean isRecovery) { - synchronized (clearingLockNowLock) { + if (clearingLockNow) { // Here: this function is being called from too many paths. Until cleanup, this guard is here to avoid recursions + return; + } + synchronized (clearingLockNowLock) { + // repeat under lock if (clearingLockNow) { return; } @@ -233,6 +237,12 @@ private void fileLockClearFlushAndClose(boolean isRecovery) { throw new AlreadyClosedException("FileLock: Expected to be locked during close.This=" + this + " existingUuid=" + existingUuid); // The string append methods should handle null arguments. } } + }) + .whenComplete((res, err) -> { + synchronized (clearingLockNowLock) { + // clearing under lock to avoid spotbugsMain's "Inconsistent synchronization" issue. + clearingLockNow = false; + } }); if (agilityContext.isClosed()) { @@ -253,10 +263,6 @@ private void fileLockClearFlushAndClose(boolean isRecovery) { } finally { closed = flushed; // allow close retry closingContext = null; - synchronized (clearingLockNowLock) { - // clearing under lock to avoid spotbugsMain's "Inconsistent synchronization" issue. - clearingLockNow = false; - } } }