Skip to content

Commit bb93d20

Browse files
authored
Lucene: index filters support (#3688)
Add support for index filters by predicates or by IndexMaintenanceFilter. However, Lucene can only support ALL or NONE. Filtering SOME will cause an exception. Resolves #3065
1 parent f032a91 commit bb93d20

File tree

7 files changed

+491
-48
lines changed

7 files changed

+491
-48
lines changed

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/indexes/StandardIndexMaintainer.java

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -359,49 +359,18 @@ public <M extends Message> List<IndexEntry> filteredIndexEntries(@Nullable final
359359
if (savedRecord == null) {
360360
return null;
361361
}
362-
// Apply both filters:
363-
// 1. Index predicates (if exist)
364-
// 2. IndexMaintenanceFilter
365-
// In the longer term, we will probably think about deprecating the index maintenance filter.
366-
final FDBStoreTimer timer = state.store.getTimer();
367-
final IndexPredicate predicate = state.index.getPredicate();
368-
if (predicate != null) {
369-
final long startTime = System.nanoTime();
370-
final boolean useMe = predicate.shouldIndexThisRecord(state.store, savedRecord);
371-
// Note: for now, IndexPredicate will not support filtering of certain index entries
372-
if (timer != null) {
373-
final FDBStoreTimer.Events event =
374-
useMe ?
375-
FDBStoreTimer.Events.USE_INDEX_RECORD_BY_PREDICATE :
376-
FDBStoreTimer.Events.SKIP_INDEX_RECORD_BY_PREDICATE;
377-
timer.recordSinceNanoTime(event, startTime);
378-
}
379-
if (!useMe) {
380-
// Here: index predicate filters out this record
381-
return null;
382-
}
383-
}
384-
final Message record = savedRecord.getRecord();
385-
long startTime = System.nanoTime();
386-
boolean filterIndexKeys = false;
387-
switch (state.filter.maintainIndex(state.index, record)) {
388-
case NONE:
389-
if (timer != null) {
390-
timer.recordSinceNanoTime(FDBStoreTimer.Events.SKIP_INDEX_RECORD, startTime);
391-
}
392-
return null;
393-
case SOME:
394-
filterIndexKeys = true;
395-
break;
396-
case ALL:
397-
default:
398-
break;
362+
final IndexMaintenanceFilter.IndexValues filterType = getFilterTypeForRecord(savedRecord);
363+
if (filterType == IndexMaintenanceFilter.IndexValues.NONE) {
364+
return null;
399365
}
400366
List<IndexEntry> indexEntries = evaluateIndex(savedRecord);
401-
if (!filterIndexKeys) {
367+
if (filterType == IndexMaintenanceFilter.IndexValues.ALL) {
402368
return indexEntries;
403369
}
370+
// Here: filterType is SOME. Check each index entry
371+
long startTime = System.nanoTime();
404372
int i = 0;
373+
final Message record = savedRecord.getRecord();
405374
while (i < indexEntries.size()) {
406375
if (state.filter.maintainIndexValue(state.index, record, indexEntries.get(i))) {
407376
i++;
@@ -418,6 +387,37 @@ public <M extends Message> List<IndexEntry> filteredIndexEntries(@Nullable final
418387
return indexEntries;
419388
}
420389

390+
protected <M extends Message> IndexMaintenanceFilter.IndexValues getFilterTypeForRecord(@Nonnull final FDBIndexableRecord<M> savedRecord) {
391+
// Apply both filters:
392+
// 1. Index predicates (if exist) - currently supports filtering out (i.e. NONE). If not filtered out, fallthrough to the next filter
393+
// 2. IndexMaintenanceFilter - supports ALL, NONE, and SOME
394+
// In the longer term, we will probably think about deprecating the index maintenance filter.
395+
final FDBStoreTimer timer = state.store.getTimer();
396+
final IndexPredicate predicate = state.index.getPredicate();
397+
if (predicate != null) {
398+
final long startTime = timer != null ? System.nanoTime() : 0L;
399+
final boolean useMe = predicate.shouldIndexThisRecord(state.store, savedRecord);
400+
// Note: for now, IndexPredicate will not support filtering of certain index entries
401+
if (timer != null) {
402+
final FDBStoreTimer.Events event =
403+
useMe ?
404+
FDBStoreTimer.Events.USE_INDEX_RECORD_BY_PREDICATE :
405+
FDBStoreTimer.Events.SKIP_INDEX_RECORD_BY_PREDICATE;
406+
timer.recordSinceNanoTime(event, startTime);
407+
}
408+
if (!useMe) {
409+
return IndexMaintenanceFilter.IndexValues.NONE;
410+
}
411+
}
412+
long startTime = System.nanoTime();
413+
IndexMaintenanceFilter.IndexValues ret = state.filter.maintainIndex(state.index, savedRecord.getRecord());
414+
if (ret == IndexMaintenanceFilter.IndexValues.NONE && timer != null) {
415+
// events are backward compatible
416+
timer.recordSinceNanoTime(FDBStoreTimer.Events.SKIP_INDEX_RECORD, startTime);
417+
}
418+
return ret;
419+
}
420+
421421
@Nonnull
422422
protected List<IndexEntry> commonKeys(@Nonnull List<IndexEntry> oldIndexEntries,
423423
@Nonnull List<IndexEntry> newIndexEntries) {

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import com.apple.foundationdb.record.provider.foundationdb.IndexDeferredMaintenanceControl;
5757
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainer;
5858
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainerState;
59+
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintenanceFilter;
5960
import com.apple.foundationdb.record.provider.foundationdb.IndexOperation;
6061
import com.apple.foundationdb.record.provider.foundationdb.IndexOperationResult;
6162
import com.apple.foundationdb.record.provider.foundationdb.IndexScanBounds;
@@ -458,9 +459,12 @@ public <M extends Message> CompletableFuture<Void> update(@Nullable FDBIndexable
458459
}
459460

460461
@Nonnull
461-
<M extends Message> CompletableFuture<Void> update(@Nullable FDBIndexableRecord<M> oldRecord,
462-
@Nullable FDBIndexableRecord<M> newRecord,
462+
<M extends Message> CompletableFuture<Void> update(@Nullable FDBIndexableRecord<M> oldRecordUnfiltered,
463+
@Nullable FDBIndexableRecord<M> newRecordUnfiltered,
463464
@Nullable Integer destinationPartitionIdHint) {
465+
FDBIndexableRecord<M> oldRecord = maybeFilterRecord(oldRecordUnfiltered);
466+
FDBIndexableRecord<M> newRecord = maybeFilterRecord(newRecordUnfiltered);
467+
464468
LOG.trace("update oldRecord={}, newRecord={}", oldRecord, newRecord);
465469

466470
// Extract information for grouping from old and new records
@@ -507,6 +511,19 @@ private <M extends Message> CompletableFuture<Void> updateRecord(
507511
.thenAccept(partitionId -> writeDocument(newRecord, entry, partitionId)));
508512
}
509513

514+
@Nullable
515+
public <M extends Message> FDBIndexableRecord<M> maybeFilterRecord(FDBIndexableRecord<M> rec) {
516+
if (rec != null) {
517+
final IndexMaintenanceFilter.IndexValues filterType = getFilterTypeForRecord(rec);
518+
if (filterType == IndexMaintenanceFilter.IndexValues.NONE) {
519+
return null;
520+
} else if (filterType == IndexMaintenanceFilter.IndexValues.SOME) {
521+
throw new RecordCoreException("Lucene does not support this kind of filtering");
522+
}
523+
}
524+
return rec;
525+
}
526+
510527
/**
511528
* convenience wrapper that calls {@link #tryDelete(FDBIndexableRecord, Tuple)} only if the index is in
512529
* {@code WriteOnly} mode.
@@ -782,7 +799,7 @@ public IndexScrubbingTools<?> getIndexScrubbingTools(final IndexScrubbingTools.S
782799
final Map<String, String> options = state.index.getOptions();
783800
if (Boolean.parseBoolean(options.get(LuceneIndexOptions.PRIMARY_KEY_SEGMENT_INDEX_ENABLED)) ||
784801
Boolean.parseBoolean(options.get(LuceneIndexOptions.PRIMARY_KEY_SEGMENT_INDEX_V2_ENABLED))) {
785-
return new LuceneIndexScrubbingToolsMissing(partitioner, directoryManager);
802+
return new LuceneIndexScrubbingToolsMissing(partitioner, directoryManager, this);
786803
}
787804
return null;
788805
default:

fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingToolsMissing.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,13 @@ public class LuceneIndexScrubbingToolsMissing extends ValueIndexScrubbingToolsMi
6767
private final LucenePartitioner partitioner;
6868
@Nonnull
6969
private final FDBDirectoryManager directoryManager;
70+
@Nonnull
71+
private final LuceneIndexMaintainer indexMaintainer;
7072

71-
public LuceneIndexScrubbingToolsMissing(@Nonnull LucenePartitioner partitioner, @Nonnull FDBDirectoryManager directoryManager) {
73+
public LuceneIndexScrubbingToolsMissing(@Nonnull LucenePartitioner partitioner, @Nonnull FDBDirectoryManager directoryManager, @Nonnull LuceneIndexMaintainer indexMaintainer) {
7274
this.partitioner = partitioner;
7375
this.directoryManager = directoryManager;
76+
this.indexMaintainer = indexMaintainer;
7477
}
7578

7679

@@ -100,7 +103,7 @@ public CompletableFuture<Issue> handleOneItem(final FDBRecordStore store, final
100103
}
101104

102105
final FDBStoredRecord<Message> rec = result.get();
103-
if (rec == null || !recordTypes.contains(rec.getRecordType())) {
106+
if (!shouldHandleItem(rec)) {
104107
return CompletableFuture.completedFuture(null);
105108
}
106109

@@ -121,6 +124,13 @@ public CompletableFuture<Issue> handleOneItem(final FDBRecordStore store, final
121124
});
122125
}
123126

127+
private boolean shouldHandleItem(FDBStoredRecord<Message> rec) {
128+
if (rec == null || !recordTypes.contains(rec.getRecordType())) {
129+
return false;
130+
}
131+
return indexMaintainer.maybeFilterRecord(rec) != null;
132+
}
133+
124134
@SuppressWarnings("PMD.CloseResource")
125135
private CompletableFuture<Pair<MissingIndexReason, Tuple>> detectMissingIndexKeys(final FDBRecordStore store, FDBStoredRecord<Message> rec) {
126136
// Generate synthetic record (if applicable) and return the first detected missing (if any).

fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexScrubbingTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
class LuceneIndexScrubbingTest extends FDBLuceneTestBase {
5454

5555
private TestingIndexMaintainerRegistry registry;
56-
private boolean flipBoolean = false;
5756

5857
@BeforeEach
5958
public void beforeEach() {

fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestDataModel.java

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,13 @@
2020

2121
package com.apple.foundationdb.record.lucene;
2222

23+
import com.apple.foundationdb.record.IndexEntry;
2324
import com.apple.foundationdb.record.RecordMetaData;
2425
import com.apple.foundationdb.record.RecordMetaDataBuilder;
26+
import com.apple.foundationdb.record.ScanProperties;
2527
import com.apple.foundationdb.record.TestRecordsGroupedParentChildProto;
2628
import com.apple.foundationdb.record.metadata.Index;
29+
import com.apple.foundationdb.record.metadata.IndexPredicate;
2730
import com.apple.foundationdb.record.metadata.JoinedRecordTypeBuilder;
2831
import com.apple.foundationdb.record.metadata.Key;
2932
import com.apple.foundationdb.record.metadata.expressions.KeyExpression;
@@ -32,6 +35,7 @@
3235
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
3336
import com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer;
3437
import com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord;
38+
import com.apple.foundationdb.record.provider.foundationdb.OnlineIndexScrubber;
3539
import com.apple.foundationdb.record.provider.foundationdb.OnlineIndexer;
3640
import com.apple.foundationdb.record.provider.foundationdb.keyspace.KeySpacePath;
3741
import com.apple.foundationdb.record.test.TestKeySpace;
@@ -316,9 +320,10 @@ private LuceneIndexTestValidator getValidator(final Supplier<FDBRecordContext> o
316320

317321
@Nonnull
318322
static Index addIndex(final boolean isSynthetic, final KeyExpression rootExpression,
319-
final Map<String, String> options, final RecordMetaDataBuilder metaDataBuilder) {
323+
final Map<String, String> options, final RecordMetaDataBuilder metaDataBuilder,
324+
@Nullable IndexPredicate predicate) {
320325
Index index;
321-
index = new Index("joinNestedConcat", rootExpression, LuceneIndexTypes.LUCENE, options);
326+
index = new Index("joinNestedConcat", rootExpression, LuceneIndexTypes.LUCENE, options, predicate);
322327

323328
if (isSynthetic) {
324329
final JoinedRecordTypeBuilder joinBuilder = metaDataBuilder.addJoinedRecordType("JoinChildren");
@@ -399,6 +404,28 @@ public void explicitMergeIndex(final FDBRecordContext context, @Nullable FDBStor
399404
}
400405
}
401406

407+
public long findMissingIndexEntries(final FDBRecordContext context, @Nullable FDBStoreTimer timer) {
408+
FDBRecordStore recordStore = Objects.requireNonNull(schemaSetup.apply(context));
409+
try (OnlineIndexScrubber indexBuilder = OnlineIndexScrubber.newBuilder()
410+
.setRecordStore(recordStore)
411+
.setIndex(index)
412+
.setTimer(timer)
413+
.build()) {
414+
return indexBuilder.scrubMissingIndexEntries();
415+
}
416+
}
417+
418+
public List<IndexEntry> findAllRecordsByQuery(final FDBRecordContext context, int group) {
419+
LuceneQueryClause search = LuceneQuerySearchClause.MATCH_ALL_DOCS_QUERY;
420+
421+
FDBRecordStore store = Objects.requireNonNull(schemaSetup.apply(context));
422+
LuceneScanBounds scanBounds = isGrouped
423+
? LuceneIndexTestValidator.groupedSortedTextSearch(store, index, search, null, group)
424+
: LuceneIndexTestUtils.fullTextSearch(store, index, search, false);
425+
return store.scanIndex(index, scanBounds, null, ScanProperties.FORWARD_SCAN)
426+
.asList().join();
427+
}
428+
402429
public Random getRandom() {
403430
return random;
404431
}
@@ -429,6 +456,8 @@ static class Builder {
429456
private Index index;
430457
@Nullable
431458
private RecordMetaData metadata;
459+
@Nullable
460+
IndexPredicate predicate = null;
432461

433462
public Builder(final long seed, StoreBuilderSupplier storeBuilderSupplier,
434463
TestKeySpacePathManagerExtension pathManager) {
@@ -461,6 +490,12 @@ public Builder setPartitionHighWatermark(final int partitionHighWatermark) {
461490
return this;
462491
}
463492

493+
public Builder setPredicate(@Nullable final IndexPredicate predicate) {
494+
this.predicate = predicate;
495+
metadata = null;
496+
return this;
497+
}
498+
464499
public Builder setTextGeneratorWithNewRandom(final RandomTextGenerator textGenerator) {
465500
this.textGenerator = textGenerator.withNewRandom(random);
466501
return this;
@@ -484,7 +519,7 @@ public LuceneIndexTestDataModel build() {
484519
final Map<String, String> options = getOptions();
485520
final RecordMetaDataBuilder metaDataBuilder = LuceneIndexTestDataModel.createBaseMetaDataBuilder();
486521
final KeyExpression rootExpression = LuceneIndexTestDataModel.createRootExpression(isGrouped, isSynthetic);
487-
this.index = LuceneIndexTestDataModel.addIndex(isSynthetic, rootExpression, options, metaDataBuilder);
522+
this.index = LuceneIndexTestDataModel.addIndex(isSynthetic, rootExpression, options, metaDataBuilder, predicate);
488523
this.metadata = metaDataBuilder.build();
489524
}
490525
final Function<FDBRecordContext, FDBRecordStore> schemaSetup = context -> {

fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneOnlineIndexingTest.java

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainer;
4747
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainerFactory;
4848
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainerState;
49+
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintenanceFilter;
4950
import com.apple.foundationdb.record.provider.foundationdb.OnlineIndexer;
5051
import com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils;
5152
import com.apple.foundationdb.record.provider.foundationdb.properties.RecordLayerPropertyStorage;
@@ -55,6 +56,7 @@
5556
import com.apple.foundationdb.record.util.pair.Pair;
5657
import com.apple.foundationdb.subspace.Subspace;
5758
import com.apple.foundationdb.tuple.Tuple;
59+
import com.apple.test.BooleanSource;
5860
import com.apple.test.RandomSeedSource;
5961
import com.apple.test.RandomizedTestUtils;
6062
import com.google.auto.service.AutoService;
@@ -106,6 +108,7 @@
106108
import static com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.MAP_DOC;
107109
import static com.apple.foundationdb.record.provider.foundationdb.indexes.TextIndexTestUtils.SIMPLE_DOC;
108110
import static org.junit.jupiter.api.Assertions.assertEquals;
111+
import static org.junit.jupiter.api.Assertions.assertNotEquals;
109112
import static org.junit.jupiter.api.Assertions.assertNotNull;
110113
import static org.junit.jupiter.api.Assertions.assertNull;
111114
import static org.junit.jupiter.api.Assertions.assertSame;
@@ -633,7 +636,6 @@ void luceneOnlineIndexingTestMulti() throws IOException {
633636
}
634637
}
635638

636-
637639
protected void openRecordStore(FDBRecordContext context, FDBRecordStoreTestBase.RecordMetaDataHook hook) {
638640
RecordMetaDataBuilder metaDataBuilder = RecordMetaData.newBuilder().setRecords(TestRecordsTextProto.getDescriptor());
639641
metaDataBuilder.getRecordType(COMPLEX_DOC).setPrimaryKey(concatenateFields("group", "doc_id"));
@@ -644,6 +646,23 @@ protected void openRecordStore(FDBRecordContext context, FDBRecordStoreTestBase.
644646
setupPlanner(null);
645647
}
646648

649+
protected void openRecordStoreWithFilter(FDBRecordContext context, FDBRecordStoreTestBase.RecordMetaDataHook hook, boolean filterOut) {
650+
RecordMetaDataBuilder metaDataBuilder = RecordMetaData.newBuilder().setRecords(TestRecordsTextProto.getDescriptor());
651+
metaDataBuilder.getRecordType(COMPLEX_DOC).setPrimaryKey(concatenateFields("group", "doc_id"));
652+
hook.apply(metaDataBuilder);
653+
final FDBRecordStore.Builder builder = getStoreBuilder(context, metaDataBuilder.getRecordMetaData())
654+
.setSerializer(TextIndexTestUtils.COMPRESSING_SERIALIZER);
655+
if (filterOut) {
656+
recordStore = builder
657+
.setIndexMaintenanceFilter((i, r) -> IndexMaintenanceFilter.IndexValues.NONE)
658+
.createOrOpen();
659+
} else {
660+
recordStore = builder
661+
.createOrOpen();
662+
}
663+
setupPlanner(null);
664+
}
665+
647666
@ParameterizedTest
648667
@ValueSource(ints = {1, 2, 3})
649668
void luceneOnlineIndexingTestGroupingKeys(int groupingCount) {
@@ -760,6 +779,43 @@ void luceneOnlineIndexingTestGroupingKeysBackgroundMerge(int groupingCount) thro
760779
assertTrue(newLength < oldLength);
761780
}
762781

782+
@ParameterizedTest
783+
@BooleanSource
784+
void luceneOnlineIndexingTestNoMergeIfFilteredOutRecords(boolean filterOut) throws IOException {
785+
Index index = new Index(
786+
"Map_with_auto_complete$entry-value",
787+
new GroupingKeyExpression(field("entry",
788+
KeyExpression.FanType.FanOut).nest(concat(LuceneIndexTestUtils.keys)), 3),
789+
LuceneIndexTypes.LUCENE,
790+
ImmutableMap.of());
791+
792+
RecordMetaDataHook hook = metaDataBuilder -> {
793+
metaDataBuilder.removeIndex(TextIndexTestUtils.SIMPLE_DEFAULT_NAME);
794+
TextIndexTestUtils.addRecordTypePrefix(metaDataBuilder);
795+
metaDataBuilder.addIndex(MAP_DOC, index);
796+
};
797+
int group = 3;
798+
799+
// write/overwrite records
800+
boolean needMerge = false;
801+
for (int iLast = 60; iLast > 40; iLast --) {
802+
try (FDBRecordContext context = openContext()) {
803+
openRecordStoreWithFilter(context, hook, filterOut);
804+
for (int i = 0; i < iLast; i++) {
805+
recordStore.saveRecord(multiEntryMapDoc(77L * i, ENGINEER_JOKE + iLast, group));
806+
}
807+
final Set<Index> indexSet = recordStore.getIndexDeferredMaintenanceControl().getMergeRequiredIndexes();
808+
if (indexSet != null && !indexSet.isEmpty()) {
809+
assertEquals(1, indexSet.size());
810+
assertEquals(indexSet.stream().findFirst().get().getName(), index.getName());
811+
needMerge = true;
812+
}
813+
commit(context);
814+
}
815+
}
816+
assertNotEquals(needMerge, filterOut);
817+
}
818+
763819
private TestRecordsTextProto.MapDocument multiEntryMapDoc(long id, String text, int group) {
764820
assertTrue(group < 4);
765821
String text2 = "Text 2, and " + (id % 2);

0 commit comments

Comments
 (0)