From 39c92b0d4a3fd6fd884fe02cba747823ac625521 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Wed, 19 Nov 2025 21:23:49 -0800 Subject: [PATCH 1/8] Pointrange query optimizations Signed-off-by: Prudhvi Godithi --- .../apache/lucene/search/PointRangeQuery.java | 476 +++++++++++------- .../java/org/apache/lucene/search/Weight.java | 24 + 2 files changed, 332 insertions(+), 168 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index c198fecb4b35..3f12b075ff57 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.BiFunction; import java.util.function.Predicate; import org.apache.lucene.document.IntPoint; @@ -32,6 +33,7 @@ import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; +import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.FixedBitSet; @@ -59,16 +61,6 @@ public abstract class PointRangeQuery extends Query { final byte[] upperPoint; final ByteArrayComparator comparator; - /** - * Expert: create a multidimensional range query for point values. - * - * @param field field name. must not be {@code null}. - * @param lowerPoint lower portion of the range (inclusive). - * @param upperPoint upper portion of the range (inclusive). - * @param numDims number of dimensions. - * @throws IllegalArgumentException if {@code field} is null, or if {@code lowerValue.length != - * upperValue.length} - */ protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) { checkArgs(field, lowerPoint, upperPoint); this.field = field; @@ -83,10 +75,10 @@ protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, in } if (lowerPoint.length != upperPoint.length) { throw new IllegalArgumentException( - "lowerPoint has length=" - + lowerPoint.length - + " but upperPoint has different length=" - + upperPoint.length); + "lowerPoint has length=" + + lowerPoint.length + + " but upperPoint has different length=" + + upperPoint.length); } this.numDims = numDims; this.bytesPerDim = lowerPoint.length / numDims; @@ -97,12 +89,6 @@ protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, in this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim); } - /** - * Check preconditions for all factory methods - * - * @throws IllegalArgumentException if {@code field}, {@code lowerPoint} or {@code upperPoint} are - * null. - */ public static void checkArgs(String field, Object lowerPoint, Object upperPoint) { if (field == null) { throw new IllegalArgumentException("field must not be null"); @@ -124,22 +110,23 @@ public void visit(QueryVisitor visitor) { @Override public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { - - // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". - // This is an inverted structure and should be used in the first pass: + throws IOException { return new ConstantScoreWeight(this, boost) { + // Cache to share DocIdSet computation across partitions of the same segment + // Key: LeafReaderContext (identifies the segment) + // Value: Lazily-initialized DocIdSet for the entire segment + private final ConcurrentHashMap + segmentCache = new ConcurrentHashMap<>(); + private boolean matches(byte[] packedValue) { int offset = 0; for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { if (comparator.compare(packedValue, offset, lowerPoint, offset) < 0) { - // Doc's value is too low, in this dimension return false; } if (comparator.compare(packedValue, offset, upperPoint, offset) > 0) { - // Doc's value is too high, in this dimension return false; } } @@ -192,7 +179,6 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { }; } - /** Create a visitor that sets documents that do NOT match the range. */ private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, long[] cost) { return new IntersectVisitor() { @@ -235,10 +221,8 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { Relation relation = relate(minPackedValue, maxPackedValue); switch (relation) { case CELL_INSIDE_QUERY: - // all points match, skip this subtree return Relation.CELL_OUTSIDE_QUERY; case CELL_OUTSIDE_QUERY: - // none of the points match, clear all documents return Relation.CELL_INSIDE_QUERY; case CELL_CROSSES_QUERY: default: @@ -248,9 +232,77 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { }; } + /** + * Helper class that lazily builds and caches a DocIdSet for an entire segment. + * This allows multiple partitions of the same segment to share the BKD traversal work. + */ + private class SegmentDocIdSetSupplier { + private final LeafReaderContext context; + private final PointValues values; + private volatile DocIdSet cachedDocIdSet = null; + private final Object buildLock = new Object(); + + SegmentDocIdSetSupplier(LeafReaderContext context, PointValues values) { + this.context = context; + this.values = values; + } + + /** + * Get or build the DocIdSet for the entire segment. + * Thread-safe: first thread builds, others wait and reuse. + */ + DocIdSet getOrBuild() throws IOException { + DocIdSet result = cachedDocIdSet; + if (result == null) { + synchronized (buildLock) { + result = cachedDocIdSet; + if (result == null) { + result = buildDocIdSet(); + cachedDocIdSet = result; + } + } + } + return result; + } + + private DocIdSet buildDocIdSet() throws IOException { + LeafReader reader = context.reader(); + + // Check if we should use inverse intersection optimization + if (values.getDocCount() == reader.maxDoc() + && values.getDocCount() == values.size() + && estimateCost() > reader.maxDoc() / 2) { + + // Build inverse bitset (docs that DON'T match) + final FixedBitSet result = new FixedBitSet(reader.maxDoc()); + long[] cost = new long[1]; + values.intersect(getInverseIntersectVisitor(result, cost)); + + // Flip to get docs that DO match + result.flip(0, reader.maxDoc()); + cost[0] = Math.max(0, reader.maxDoc() - cost[0]); + + return new BitDocIdSet(result, cost[0]); + } else { + // Normal path: build DocIdSet from matching docs + DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc(), values); + IntersectVisitor visitor = getIntersectVisitor(builder); + values.intersect(visitor); + return builder.build(); + } + } + + private long estimateCost() throws IOException { + DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), values); + IntersectVisitor visitor = getIntersectVisitor(builder); + return values.estimateDocCount(visitor); + } + } + @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - LeafReader reader = context.reader(); + public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition) + throws IOException { + LeafReader reader = partition.ctx.reader(); PointValues values = reader.getPointValues(field); if (checkValidPointValues(values) == false) { @@ -265,11 +317,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti for (int i = 0; i < numDims; ++i) { int offset = i * bytesPerDim; if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0 - || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) { - // If this query is a required clause of a boolean query, then returning null here - // will help make sure that we don't call ScorerSupplier#get on other required clauses - // of the same boolean query, which is an expensive operation for some queries (e.g. - // multi-term queries). + || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) { return null; } } @@ -283,7 +331,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti for (int i = 0; i < numDims; ++i) { int offset = i * bytesPerDim; if (comparator.compare(lowerPoint, offset, fieldPackedLower, offset) > 0 - || comparator.compare(upperPoint, offset, fieldPackedUpper, offset) < 0) { + || comparator.compare(upperPoint, offset, fieldPackedUpper, offset) < 0) { allDocsMatch = false; break; } @@ -293,49 +341,183 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti } if (allDocsMatch) { - // all docs have a value and all points are within bounds, so everything matches return ConstantScoreScorerSupplier.matchAll(score(), scoreMode, reader.maxDoc()); } else { - return new ConstantScoreScorerSupplier(score(), scoreMode, reader.maxDoc()) { + // Get or create the cached supplier for this segment + SegmentDocIdSetSupplier segmentSupplier = segmentCache.computeIfAbsent( + partition.ctx, + ctx -> new SegmentDocIdSetSupplier(ctx, values) + ); + + return new PartitionScorerSupplier( + segmentSupplier, + partition.minDocId, + partition.maxDocId, + score(), + scoreMode + ); + } + } - final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values); - final IntersectVisitor visitor = getIntersectVisitor(result); - long cost = -1; + /** + * ScorerSupplier for a partition that filters results from the shared segment DocIdSet. + */ + private class PartitionScorerSupplier extends ScorerSupplier { + private final SegmentDocIdSetSupplier segmentSupplier; + private final int minDocId; + private final int maxDocId; + private final float score; + private final ScoreMode scoreMode; + + PartitionScorerSupplier( + SegmentDocIdSetSupplier segmentSupplier, + int minDocId, + int maxDocId, + float score, + ScoreMode scoreMode) { + this.segmentSupplier = segmentSupplier; + this.minDocId = minDocId; + this.maxDocId = maxDocId; + this.score = score; + this.scoreMode = scoreMode; + } - @Override - public DocIdSetIterator iterator(long leadCost) throws IOException { - if (values.getDocCount() == reader.maxDoc() - && values.getDocCount() == values.size() - && cost() > reader.maxDoc() / 2) { - // If all docs have exactly one value and the cost is greater - // than half the leaf size then maybe we can make things faster - // by computing the set of documents that do NOT match the range - final FixedBitSet result = new FixedBitSet(reader.maxDoc()); - long[] cost = new long[1]; - values.intersect(getInverseIntersectVisitor(result, cost)); - // Flip the bit set and cost - result.flip(0, reader.maxDoc()); - cost[0] = Math.max(0, reader.maxDoc() - cost[0]); - return new BitSetIterator(result, cost[0]); - } + @Override + public Scorer get(long leadCost) throws IOException { + DocIdSetIterator iterator = getIterator(); + if (iterator == null) { + return null; + } + return new ConstantScoreScorer(score, scoreMode, iterator); + } - values.intersect(visitor); - return result.build().iterator(); - } + private DocIdSetIterator getIterator() throws IOException { + // Get the shared DocIdSet (built once per segment) + DocIdSet docIdSet = segmentSupplier.getOrBuild(); + DocIdSetIterator fullIterator = docIdSet.iterator(); - @Override - public long cost() { - if (cost == -1) { - // Computing the cost may be expensive, so only do it if necessary - cost = values.estimateDocCount(visitor); - assert cost >= 0; - } - return cost; + if (fullIterator == null) { + return null; + } + + // Check if this is a full segment (no partition filtering needed) + boolean isFullSegment = + (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); + + if (isFullSegment) { + return fullIterator; + } + + // Wrap iterator to filter to partition range + return new PartitionFilteredDocIdSetIterator(fullIterator, minDocId, maxDocId); + } + + @Override + public long cost() { + try { + DocIdSet docIdSet = segmentSupplier.getOrBuild(); + long totalCost = docIdSet.iterator().cost(); + + // Estimate cost for this partition proportionally + boolean isFullSegment = + (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); + + if (isFullSegment) { + return totalCost; } - }; + + // Proportional estimate based on partition size + int segmentSize = segmentSupplier.context.reader().maxDoc(); + int partitionSize = maxDocId - minDocId; + return (totalCost * partitionSize) / segmentSize; + } catch (IOException e) { + // If we can't get the cost, return a conservative estimate + return maxDocId - minDocId; + } + } + + @Override + public BulkScorer bulkScorer() throws IOException { + Scorer scorer = get(Long.MAX_VALUE); + if (scorer == null) { + return null; + } + return new Weight.DefaultBulkScorer(scorer); } } + /** + * Iterator that filters another iterator to only return docs within a partition range. + * Reading from a FixedBitSet is thread-safe (just reading from long[]), so multiple + * partitions can read from the same underlying DocIdSet concurrently. + */ + private static class PartitionFilteredDocIdSetIterator extends DocIdSetIterator { + private final DocIdSetIterator delegate; + private final int minDocId; + private final int maxDocId; + private int doc = -1; + + PartitionFilteredDocIdSetIterator( + DocIdSetIterator delegate, + int minDocId, + int maxDocId) { + this.delegate = delegate; + this.minDocId = minDocId; + this.maxDocId = maxDocId; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() throws IOException { + if (doc == -1) { + // First call: advance to minDocId + doc = delegate.advance(minDocId); + } else { + doc = delegate.nextDoc(); + } + + // Stop if we've exceeded the partition range + if (doc >= maxDocId) { + doc = NO_MORE_DOCS; + } + + return doc; + } + + @Override + public int advance(int target) throws IOException { + if (target >= maxDocId) { + return doc = NO_MORE_DOCS; + } + + // Ensure target is at least minDocId + target = Math.max(target, minDocId); + doc = delegate.advance(target); + + if (doc >= maxDocId) { + doc = NO_MORE_DOCS; + } + + return doc; + } + + @Override + public long cost() { + // Conservative estimate based on partition size + return Math.min(delegate.cost(), maxDocId - minDocId); + } + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + return scorerSupplier( + IndexSearcher.LeafReaderContextPartition.createForEntireSegment(context)); + } + @Override public int count(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); @@ -347,96 +529,68 @@ public int count(LeafReaderContext context) throws IOException { if (reader.hasDeletions() == false) { if (relate(values.getMinPackedValue(), values.getMaxPackedValue()) - == Relation.CELL_INSIDE_QUERY) { + == Relation.CELL_INSIDE_QUERY) { return values.getDocCount(); } - // only 1D: we have the guarantee that it will actually run fast since there are at most 2 - // crossing leaves. - // docCount == size : counting according number of points in leaf node, so must be - // single-valued. if (numDims == 1 && values.getDocCount() == values.size()) { return (int) - pointCount(values.getPointTree(), PointRangeQuery.this::relate, this::matches); + pointCount(values.getPointTree(), PointRangeQuery.this::relate, this::matches); } } return super.count(context); } - /** - * Finds the number of points matching the provided range conditions. Using this method is - * faster than calling {@link PointValues#intersect(IntersectVisitor)} to get the count of - * intersecting points. This method does not enforce live documents, therefore it should only - * be used when there are no deleted documents. - * - * @param pointTree start node of the count operation - * @param nodeComparator comparator to be used for checking whether the internal node is - * inside the range - * @param leafComparator comparator to be used for checking whether the leaf node is inside - * the range - * @return count of points that match the range - */ private long pointCount( - PointValues.PointTree pointTree, - BiFunction nodeComparator, - Predicate leafComparator) - throws IOException { + PointValues.PointTree pointTree, + BiFunction nodeComparator, + Predicate leafComparator) + throws IOException { final long[] matchingNodeCount = {0}; - // create a custom IntersectVisitor that records the number of leafNodes that matched final IntersectVisitor visitor = - new IntersectVisitor() { - @Override - public void visit(int docID) { - // this branch should be unreachable - throw new UnsupportedOperationException( - "This IntersectVisitor does not perform any actions on a " - + "docID=" - + docID - + " node being visited"); - } - - @Override - public void visit(int docID, byte[] packedValue) { - if (leafComparator.test(packedValue)) { - matchingNodeCount[0]++; - } - } - - @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return nodeComparator.apply(minPackedValue, maxPackedValue); - } - }; + new IntersectVisitor() { + @Override + public void visit(int docID) { + throw new UnsupportedOperationException( + "This IntersectVisitor does not perform any actions on a " + + "docID=" + + docID + + " node being visited"); + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (leafComparator.test(packedValue)) { + matchingNodeCount[0]++; + } + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return nodeComparator.apply(minPackedValue, maxPackedValue); + } + }; pointCount(visitor, pointTree, matchingNodeCount); return matchingNodeCount[0]; } private void pointCount( - IntersectVisitor visitor, PointValues.PointTree pointTree, long[] matchingNodeCount) - throws IOException { + IntersectVisitor visitor, PointValues.PointTree pointTree, long[] matchingNodeCount) + throws IOException { Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); switch (r) { case CELL_OUTSIDE_QUERY: - // This cell is fully outside the query shape: return 0 as the count of its nodes return; case CELL_INSIDE_QUERY: - // This cell is fully inside the query shape: return the size of the entire node as the - // count matchingNodeCount[0] += pointTree.size(); return; case CELL_CROSSES_QUERY: - /* - The cell crosses the shape boundary, or the cell fully contains the query, so we fall - through and do full counting. - */ if (pointTree.moveToChild()) { do { pointCount(visitor, pointTree, matchingNodeCount); } while (pointTree.moveToSibling()); pointTree.moveToParent(); } else { - // we have reached a leaf node here. pointTree.visitDocValues(visitor); - // leaf node count is saved in the matchingNodeCount array by the visitor } return; default: @@ -489,10 +643,10 @@ public final boolean equals(Object o) { private boolean equalsTo(PointRangeQuery other) { return Objects.equals(field, other.field) - && numDims == other.numDims - && bytesPerDim == other.bytesPerDim - && Arrays.equals(lowerPoint, other.lowerPoint) - && Arrays.equals(upperPoint, other.upperPoint); + && numDims == other.numDims + && bytesPerDim == other.bytesPerDim + && Arrays.equals(lowerPoint, other.lowerPoint) + && Arrays.equals(upperPoint, other.upperPoint); } @Override @@ -503,7 +657,6 @@ public final String toString(String field) { sb.append(':'); } - // print ourselves as "range per dimension" for (int i = 0; i < numDims; i++) { if (i > 0) { sb.append(','); @@ -513,26 +666,18 @@ public final String toString(String field) { sb.append('['); sb.append( - toString( - i, ArrayUtil.copyOfSubArray(lowerPoint, startOffset, startOffset + bytesPerDim))); + toString( + i, ArrayUtil.copyOfSubArray(lowerPoint, startOffset, startOffset + bytesPerDim))); sb.append(" TO "); sb.append( - toString( - i, ArrayUtil.copyOfSubArray(upperPoint, startOffset, startOffset + bytesPerDim))); + toString( + i, ArrayUtil.copyOfSubArray(upperPoint, startOffset, startOffset + bytesPerDim))); sb.append(']'); } return sb.toString(); } - /** - * Returns a string of a single value in a human-readable format for debugging. This is used by - * {@link #toString()}. - * - * @param dimension dimension of the particular value - * @param value single value, never null - * @return human readable value for debugging - */ protected abstract String toString(int dimension, byte[] value); @Override @@ -543,7 +688,6 @@ public Query rewrite(IndexSearcher searcher) throws IOException { checkValidPointValues(leaf.reader().getPointValues(field)); } - // fetch the global min/max packed values across all segments byte[] globalMinPacked = PointValues.getMinPackedValue(reader, getField()); byte[] globalMaxPacked = PointValues.getMaxPackedValue(reader, getField()); @@ -584,10 +728,9 @@ private boolean canRewriteToFieldExistsQuery(IndexReader reader) { FieldInfo info = leaf.reader().getFieldInfos().fieldInfo(field); if (info != null - && info.getDocValuesType() == DocValuesType.NONE - && !info.hasNorms() - && info.getVectorDimension() == 0) { - // Can't use a FieldExistsQuery on this segment, so return false + && info.getDocValuesType() == DocValuesType.NONE + && !info.hasNorms() + && info.getVectorDimension() == 0) { return false; } } @@ -602,16 +745,14 @@ private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { if (comparator.compare(minPackedValue, offset, upperPoint, offset) > 0 - || comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) { + || comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) { return Relation.CELL_OUTSIDE_QUERY; } - // Evaluate crosses only when false. Still need to iterate through - // all the dimensions to ensure, none of them is completely outside if (crosses == false) { crosses = - comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0 - || comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0; + comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0 + || comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0; } } @@ -624,27 +765,26 @@ private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { private boolean checkValidPointValues(PointValues values) throws IOException { if (values == null) { - // No docs in this segment/field indexed any points return false; } if (values.getNumIndexDimensions() != numDims) { throw new IllegalArgumentException( - "field=\"" - + field - + "\" was indexed with numIndexDimensions=" - + values.getNumIndexDimensions() - + " but this query has numDims=" - + numDims); + "field=\"" + + field + + "\" was indexed with numIndexDimensions=" + + values.getNumIndexDimensions() + + " but this query has numDims=" + + numDims); } if (bytesPerDim != values.getBytesPerDimension()) { throw new IllegalArgumentException( - "field=\"" - + field - + "\" was indexed with bytesPerDim=" - + values.getBytesPerDimension() - + " but this query has bytesPerDim=" - + bytesPerDim); + "field=\"" + + field + + "\" was indexed with bytesPerDim=" + + values.getBytesPerDimension() + + " but this query has bytesPerDim=" + + bytesPerDim); } return true; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 341dd3cadf6a..ddd03ba6e73c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -149,6 +149,30 @@ public final Scorer scorer(LeafReaderContext context) throws IOException { */ public abstract ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException; + /** + * Returns a {@link ScorerSupplier}, which can then be used to get a {@link Scorer} for a + * partition of a leaf reader context. + * + *

This method allows queries to optimize for intra-segment concurrency by knowing the specific + * doc ID range being searched within the segment. The default implementation delegates to {@link + * #scorerSupplier(LeafReaderContext)} ignoring the partition bounds. Queries that can benefit + * from partition awareness (e.g., by creating smaller data structures scoped to the partition) + * should override this method. + * + *

A scorer supplier for the same {@link LeafReaderContext} instance may be requested multiple + * times as part of a single search call, potentially from different threads searching different + * doc ID ranges concurrently. + * + * @param partition the leaf reader context partition containing the context and doc ID range + * @return a {@link ScorerSupplier} providing the scorer, or null if scorer is null + * @throws IOException if an IOException occurs + * @see IndexSearcher.LeafReaderContextPartition + * @since 10.1 + */ + public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition) throws IOException { + return scorerSupplier(partition.ctx); + } + /** * Helper method that delegates to {@link #scorerSupplier(LeafReaderContext)}. It is implemented * as From 6bc66eca018c11385e338aa3e0e41970bb2f26a4 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Wed, 19 Nov 2025 21:25:13 -0800 Subject: [PATCH 2/8] Pointrange query optimizations Signed-off-by: Prudhvi Godithi --- .../apache/lucene/search/PointRangeQuery.java | 186 ++++++++---------- .../java/org/apache/lucene/search/Weight.java | 3 +- 2 files changed, 88 insertions(+), 101 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 3f12b075ff57..2b63ef7e208c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -34,7 +34,6 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; import org.apache.lucene.util.BitDocIdSet; -import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IntsRef; @@ -75,10 +74,10 @@ protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, in } if (lowerPoint.length != upperPoint.length) { throw new IllegalArgumentException( - "lowerPoint has length=" - + lowerPoint.length - + " but upperPoint has different length=" - + upperPoint.length); + "lowerPoint has length=" + + lowerPoint.length + + " but upperPoint has different length=" + + upperPoint.length); } this.numDims = numDims; this.bytesPerDim = lowerPoint.length / numDims; @@ -110,15 +109,15 @@ public void visit(QueryVisitor visitor) { @Override public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) - throws IOException { + throws IOException { return new ConstantScoreWeight(this, boost) { // Cache to share DocIdSet computation across partitions of the same segment // Key: LeafReaderContext (identifies the segment) // Value: Lazily-initialized DocIdSet for the entire segment - private final ConcurrentHashMap - segmentCache = new ConcurrentHashMap<>(); + private final ConcurrentHashMap segmentCache = + new ConcurrentHashMap<>(); private boolean matches(byte[] packedValue) { int offset = 0; @@ -233,8 +232,8 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { } /** - * Helper class that lazily builds and caches a DocIdSet for an entire segment. - * This allows multiple partitions of the same segment to share the BKD traversal work. + * Helper class that lazily builds and caches a DocIdSet for an entire segment. This allows + * multiple partitions of the same segment to share the BKD traversal work. */ private class SegmentDocIdSetSupplier { private final LeafReaderContext context; @@ -248,8 +247,8 @@ private class SegmentDocIdSetSupplier { } /** - * Get or build the DocIdSet for the entire segment. - * Thread-safe: first thread builds, others wait and reuse. + * Get or build the DocIdSet for the entire segment. Thread-safe: first thread builds, + * others wait and reuse. */ DocIdSet getOrBuild() throws IOException { DocIdSet result = cachedDocIdSet; @@ -270,8 +269,8 @@ private DocIdSet buildDocIdSet() throws IOException { // Check if we should use inverse intersection optimization if (values.getDocCount() == reader.maxDoc() - && values.getDocCount() == values.size() - && estimateCost() > reader.maxDoc() / 2) { + && values.getDocCount() == values.size() + && estimateCost() > reader.maxDoc() / 2) { // Build inverse bitset (docs that DON'T match) final FixedBitSet result = new FixedBitSet(reader.maxDoc()); @@ -301,7 +300,7 @@ private long estimateCost() throws IOException { @Override public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition) - throws IOException { + throws IOException { LeafReader reader = partition.ctx.reader(); PointValues values = reader.getPointValues(field); @@ -317,7 +316,7 @@ public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition pa for (int i = 0; i < numDims; ++i) { int offset = i * bytesPerDim; if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0 - || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) { + || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) { return null; } } @@ -331,7 +330,7 @@ public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition pa for (int i = 0; i < numDims; ++i) { int offset = i * bytesPerDim; if (comparator.compare(lowerPoint, offset, fieldPackedLower, offset) > 0 - || comparator.compare(upperPoint, offset, fieldPackedUpper, offset) < 0) { + || comparator.compare(upperPoint, offset, fieldPackedUpper, offset) < 0) { allDocsMatch = false; break; } @@ -344,24 +343,16 @@ public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition pa return ConstantScoreScorerSupplier.matchAll(score(), scoreMode, reader.maxDoc()); } else { // Get or create the cached supplier for this segment - SegmentDocIdSetSupplier segmentSupplier = segmentCache.computeIfAbsent( - partition.ctx, - ctx -> new SegmentDocIdSetSupplier(ctx, values) - ); + SegmentDocIdSetSupplier segmentSupplier = + segmentCache.computeIfAbsent( + partition.ctx, ctx -> new SegmentDocIdSetSupplier(ctx, values)); return new PartitionScorerSupplier( - segmentSupplier, - partition.minDocId, - partition.maxDocId, - score(), - scoreMode - ); + segmentSupplier, partition.minDocId, partition.maxDocId, score(), scoreMode); } } - /** - * ScorerSupplier for a partition that filters results from the shared segment DocIdSet. - */ + /** ScorerSupplier for a partition that filters results from the shared segment DocIdSet. */ private class PartitionScorerSupplier extends ScorerSupplier { private final SegmentDocIdSetSupplier segmentSupplier; private final int minDocId; @@ -370,11 +361,11 @@ private class PartitionScorerSupplier extends ScorerSupplier { private final ScoreMode scoreMode; PartitionScorerSupplier( - SegmentDocIdSetSupplier segmentSupplier, - int minDocId, - int maxDocId, - float score, - ScoreMode scoreMode) { + SegmentDocIdSetSupplier segmentSupplier, + int minDocId, + int maxDocId, + float score, + ScoreMode scoreMode) { this.segmentSupplier = segmentSupplier; this.minDocId = minDocId; this.maxDocId = maxDocId; @@ -401,8 +392,7 @@ private DocIdSetIterator getIterator() throws IOException { } // Check if this is a full segment (no partition filtering needed) - boolean isFullSegment = - (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); + boolean isFullSegment = (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); if (isFullSegment) { return fullIterator; @@ -419,8 +409,7 @@ public long cost() { long totalCost = docIdSet.iterator().cost(); // Estimate cost for this partition proportionally - boolean isFullSegment = - (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); + boolean isFullSegment = (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); if (isFullSegment) { return totalCost; @@ -457,10 +446,7 @@ private static class PartitionFilteredDocIdSetIterator extends DocIdSetIterator private final int maxDocId; private int doc = -1; - PartitionFilteredDocIdSetIterator( - DocIdSetIterator delegate, - int minDocId, - int maxDocId) { + PartitionFilteredDocIdSetIterator(DocIdSetIterator delegate, int minDocId, int maxDocId) { this.delegate = delegate; this.minDocId = minDocId; this.maxDocId = maxDocId; @@ -515,7 +501,7 @@ public long cost() { @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { return scorerSupplier( - IndexSearcher.LeafReaderContextPartition.createForEntireSegment(context)); + IndexSearcher.LeafReaderContextPartition.createForEntireSegment(context)); } @Override @@ -529,53 +515,53 @@ public int count(LeafReaderContext context) throws IOException { if (reader.hasDeletions() == false) { if (relate(values.getMinPackedValue(), values.getMaxPackedValue()) - == Relation.CELL_INSIDE_QUERY) { + == Relation.CELL_INSIDE_QUERY) { return values.getDocCount(); } if (numDims == 1 && values.getDocCount() == values.size()) { return (int) - pointCount(values.getPointTree(), PointRangeQuery.this::relate, this::matches); + pointCount(values.getPointTree(), PointRangeQuery.this::relate, this::matches); } } return super.count(context); } private long pointCount( - PointValues.PointTree pointTree, - BiFunction nodeComparator, - Predicate leafComparator) - throws IOException { + PointValues.PointTree pointTree, + BiFunction nodeComparator, + Predicate leafComparator) + throws IOException { final long[] matchingNodeCount = {0}; final IntersectVisitor visitor = - new IntersectVisitor() { - @Override - public void visit(int docID) { - throw new UnsupportedOperationException( - "This IntersectVisitor does not perform any actions on a " - + "docID=" - + docID - + " node being visited"); - } - - @Override - public void visit(int docID, byte[] packedValue) { - if (leafComparator.test(packedValue)) { - matchingNodeCount[0]++; - } - } - - @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return nodeComparator.apply(minPackedValue, maxPackedValue); - } - }; + new IntersectVisitor() { + @Override + public void visit(int docID) { + throw new UnsupportedOperationException( + "This IntersectVisitor does not perform any actions on a " + + "docID=" + + docID + + " node being visited"); + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (leafComparator.test(packedValue)) { + matchingNodeCount[0]++; + } + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return nodeComparator.apply(minPackedValue, maxPackedValue); + } + }; pointCount(visitor, pointTree, matchingNodeCount); return matchingNodeCount[0]; } private void pointCount( - IntersectVisitor visitor, PointValues.PointTree pointTree, long[] matchingNodeCount) - throws IOException { + IntersectVisitor visitor, PointValues.PointTree pointTree, long[] matchingNodeCount) + throws IOException { Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); switch (r) { case CELL_OUTSIDE_QUERY: @@ -643,10 +629,10 @@ public final boolean equals(Object o) { private boolean equalsTo(PointRangeQuery other) { return Objects.equals(field, other.field) - && numDims == other.numDims - && bytesPerDim == other.bytesPerDim - && Arrays.equals(lowerPoint, other.lowerPoint) - && Arrays.equals(upperPoint, other.upperPoint); + && numDims == other.numDims + && bytesPerDim == other.bytesPerDim + && Arrays.equals(lowerPoint, other.lowerPoint) + && Arrays.equals(upperPoint, other.upperPoint); } @Override @@ -666,12 +652,12 @@ public final String toString(String field) { sb.append('['); sb.append( - toString( - i, ArrayUtil.copyOfSubArray(lowerPoint, startOffset, startOffset + bytesPerDim))); + toString( + i, ArrayUtil.copyOfSubArray(lowerPoint, startOffset, startOffset + bytesPerDim))); sb.append(" TO "); sb.append( - toString( - i, ArrayUtil.copyOfSubArray(upperPoint, startOffset, startOffset + bytesPerDim))); + toString( + i, ArrayUtil.copyOfSubArray(upperPoint, startOffset, startOffset + bytesPerDim))); sb.append(']'); } @@ -728,9 +714,9 @@ private boolean canRewriteToFieldExistsQuery(IndexReader reader) { FieldInfo info = leaf.reader().getFieldInfos().fieldInfo(field); if (info != null - && info.getDocValuesType() == DocValuesType.NONE - && !info.hasNorms() - && info.getVectorDimension() == 0) { + && info.getDocValuesType() == DocValuesType.NONE + && !info.hasNorms() + && info.getVectorDimension() == 0) { return false; } } @@ -745,14 +731,14 @@ private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { if (comparator.compare(minPackedValue, offset, upperPoint, offset) > 0 - || comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) { + || comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) { return Relation.CELL_OUTSIDE_QUERY; } if (crosses == false) { crosses = - comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0 - || comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0; + comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0 + || comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0; } } @@ -770,21 +756,21 @@ private boolean checkValidPointValues(PointValues values) throws IOException { if (values.getNumIndexDimensions() != numDims) { throw new IllegalArgumentException( - "field=\"" - + field - + "\" was indexed with numIndexDimensions=" - + values.getNumIndexDimensions() - + " but this query has numDims=" - + numDims); + "field=\"" + + field + + "\" was indexed with numIndexDimensions=" + + values.getNumIndexDimensions() + + " but this query has numDims=" + + numDims); } if (bytesPerDim != values.getBytesPerDimension()) { throw new IllegalArgumentException( - "field=\"" - + field - + "\" was indexed with bytesPerDim=" - + values.getBytesPerDimension() - + " but this query has bytesPerDim=" - + bytesPerDim); + "field=\"" + + field + + "\" was indexed with bytesPerDim=" + + values.getBytesPerDimension() + + " but this query has bytesPerDim=" + + bytesPerDim); } return true; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index ddd03ba6e73c..f1cce197d7b4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -169,7 +169,8 @@ public final Scorer scorer(LeafReaderContext context) throws IOException { * @see IndexSearcher.LeafReaderContextPartition * @since 10.1 */ - public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition) throws IOException { + public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition) + throws IOException { return scorerSupplier(partition.ctx); } From fc12d4eaf9865340b7bb4ccd589c958c0b8dde08 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Wed, 19 Nov 2025 21:32:12 -0800 Subject: [PATCH 3/8] Pointrange query optimizations Signed-off-by: Prudhvi Godithi --- .../src/java/org/apache/lucene/search/IndexSearcher.java | 9 ++++++++- .../java/org/apache/lucene/search/PointRangeQuery.java | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index d1079b69089a..aa6c62c7edef 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -828,7 +828,14 @@ protected void searchLeaf( // continue with the following leaf return; } - ScorerSupplier scorerSupplier = weight.scorerSupplier(ctx); + ScorerSupplier scorerSupplier; + if (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS) { + scorerSupplier = weight.scorerSupplier(ctx); + } else { + LeafReaderContextPartition partition = + LeafReaderContextPartition.createFromAndTo(ctx, minDocId, maxDocId); + scorerSupplier = weight.scorerSupplier(partition); + } if (scorerSupplier != null) { scorerSupplier.setTopLevelScoringClause(); BulkScorer scorer = scorerSupplier.bulkScorer(); diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 2b63ef7e208c..607de799b321 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -664,6 +664,14 @@ public final String toString(String field) { return sb.toString(); } + /** + * Returns a string of a single value in a human-readable format for debugging. This is used by + * {@link #toString()}. + * + * @param dimension dimension of the particular value + * @param value single value, never null + * @return human readable value for debugging + */ protected abstract String toString(int dimension, byte[] value); @Override From 14c8442b3c685a14b27ef44bcb8694c5f7b24fe7 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Sun, 23 Nov 2025 10:43:10 -0800 Subject: [PATCH 4/8] Pointrange query optimizations Signed-off-by: Prudhvi Godithi --- .../apache/lucene/search/IndexSearcher.java | 2 +- .../apache/lucene/search/PointRangeQuery.java | 100 +++++++++++++++++- 2 files changed, 98 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index aa6c62c7edef..0e5a82a87ec4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -321,7 +321,7 @@ public QueryCachingPolicy getQueryCachingPolicy() { * href="https://github.com/apache/lucene/issues/13745">the corresponding github issue. */ protected LeafSlice[] slices(List leaves) { - return slices(leaves, MAX_DOCS_PER_SLICE, MAX_SEGMENTS_PER_SLICE, false); + return slices(leaves, MAX_DOCS_PER_SLICE, MAX_SEGMENTS_PER_SLICE, true); } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 607de799b321..924125ec26dd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -60,6 +60,16 @@ public abstract class PointRangeQuery extends Query { final byte[] upperPoint; final ByteArrayComparator comparator; + /** + * Expert: create a multidimensional range query for point values. + * + * @param field field name. must not be {@code null}. + * @param lowerPoint lower portion of the range (inclusive). + * @param upperPoint upper portion of the range (inclusive). + * @param numDims number of dimensions. + * @throws IllegalArgumentException if {@code field} is null, or if {@code lowerValue.length != + * upperValue.length} + */ protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) { checkArgs(field, lowerPoint, upperPoint); this.field = field; @@ -88,6 +98,12 @@ protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, in this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim); } + /** + * Check preconditions for all factory methods + * + * @throws IllegalArgumentException if {@code field}, {@code lowerPoint} or {@code upperPoint} are + * null. + */ public static void checkArgs(String field, Object lowerPoint, Object upperPoint) { if (field == null) { throw new IllegalArgumentException("field must not be null"); @@ -111,6 +127,9 @@ public void visit(QueryVisitor visitor) { public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". + // This is an inverted structure and should be used in the first pass: + return new ConstantScoreWeight(this, boost) { // Cache to share DocIdSet computation across partitions of the same segment @@ -123,9 +142,11 @@ private boolean matches(byte[] packedValue) { int offset = 0; for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { if (comparator.compare(packedValue, offset, lowerPoint, offset) < 0) { + // Doc's value is too low, in this dimension return false; } if (comparator.compare(packedValue, offset, upperPoint, offset) > 0) { + // Doc's value is too high, in this dimension return false; } } @@ -178,6 +199,7 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { }; } + /** Create a visitor that sets documents that do NOT match the range. */ private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, long[] cost) { return new IntersectVisitor() { @@ -240,10 +262,13 @@ private class SegmentDocIdSetSupplier { private final PointValues values; private volatile DocIdSet cachedDocIdSet = null; private final Object buildLock = new Object(); + private final long instanceId = System.nanoTime(); SegmentDocIdSetSupplier(LeafReaderContext context, PointValues values) { this.context = context; this.values = values; + System.err.println("[SUPPLIER_CREATED] SegmentDocIdSetSupplier #" + instanceId + + " for segment " + context.ord); } /** @@ -251,15 +276,29 @@ private class SegmentDocIdSetSupplier { * others wait and reuse. */ DocIdSet getOrBuild() throws IOException { + System.err.println("[GET_OR_BUILD] Called on supplier #" + instanceId + + " for segment " + context.ord + + " on thread " + Thread.currentThread().getId()); DocIdSet result = cachedDocIdSet; if (result == null) { + System.err.println("[BUILD_CHECK] cachedDocIdSet is null, entering synchronized block"); synchronized (buildLock) { result = cachedDocIdSet; if (result == null) { + System.err.println("[BUILD_START] Building DocIdSet for segment " + context.ord + + " on thread " + Thread.currentThread().getId()); + long start = System.nanoTime(); result = buildDocIdSet(); + long elapsed = (System.nanoTime() - start) / 1_000_000; + System.err.println("[BUILD_COMPLETE] Built DocIdSet for segment " + context.ord + + " in " + elapsed + "ms"); cachedDocIdSet = result; + } else { + System.err.println("[BUILD_SKIP] Another thread already built DocIdSet"); } } + } else { + System.err.println("[CACHE_HIT] Reusing cached DocIdSet for segment " + context.ord); } return result; } @@ -301,6 +340,13 @@ private long estimateCost() throws IOException { @Override public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition) throws IOException { + + System.err.println("[SCORER_SUPPLIER] Called for segment " + partition.ctx.ord + + " partition [" + partition.minDocId + ", " + partition.maxDocId + ") " + + "on thread " + Thread.currentThread().getId() + + " ctx identity: " + System.identityHashCode(partition.ctx)); + + LeafReader reader = partition.ctx.reader(); PointValues values = reader.getPointValues(field); @@ -317,6 +363,10 @@ public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition pa int offset = i * bytesPerDim; if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0 || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) { + // If this query is a required clause of a boolean query, then returning null here + // will help make sure that we don't call ScorerSupplier#get on other required clauses + // of the same boolean query, which is an expensive operation for some queries (e.g. + // multi-term queries). return null; } } @@ -340,12 +390,22 @@ public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition pa } if (allDocsMatch) { + // all docs have a value and all points are within bounds, so everything matches return ConstantScoreScorerSupplier.matchAll(score(), scoreMode, reader.maxDoc()); } else { + System.err.println("[CACHE_LOOKUP] Before computeIfAbsent, cache size: " + segmentCache.size()); // Get or create the cached supplier for this segment - SegmentDocIdSetSupplier segmentSupplier = - segmentCache.computeIfAbsent( - partition.ctx, ctx -> new SegmentDocIdSetSupplier(ctx, values)); + SegmentDocIdSetSupplier segmentSupplier = segmentCache.computeIfAbsent( + partition.ctx, + ctx -> { + System.err.println("[CACHE_MISS] CREATING new SegmentDocIdSetSupplier for segment " + ctx.ord + + " on thread " + Thread.currentThread().getId()); + return new SegmentDocIdSetSupplier(ctx, values); + } + ); + + System.err.println("[CACHE_RESULT] After computeIfAbsent, cache size: " + segmentCache.size() + + ", supplier identity: " + System.identityHashCode(segmentSupplier)); return new PartitionScorerSupplier( segmentSupplier, partition.minDocId, partition.maxDocId, score(), scoreMode); @@ -518,6 +578,10 @@ public int count(LeafReaderContext context) throws IOException { == Relation.CELL_INSIDE_QUERY) { return values.getDocCount(); } + // only 1D: we have the guarantee that it will actually run fast since there are at most 2 + // crossing leaves. + // docCount == size : counting according number of points in leaf node, so must be + // single-valued. if (numDims == 1 && values.getDocCount() == values.size()) { return (int) pointCount(values.getPointTree(), PointRangeQuery.this::relate, this::matches); @@ -526,16 +590,31 @@ public int count(LeafReaderContext context) throws IOException { return super.count(context); } + /** + * Finds the number of points matching the provided range conditions. Using this method is + * faster than calling {@link PointValues#intersect(IntersectVisitor)} to get the count of + * intersecting points. This method does not enforce live documents, therefore it should only + * be used when there are no deleted documents. + * + * @param pointTree start node of the count operation + * @param nodeComparator comparator to be used for checking whether the internal node is + * inside the range + * @param leafComparator comparator to be used for checking whether the leaf node is inside + * the range + * @return count of points that match the range + */ private long pointCount( PointValues.PointTree pointTree, BiFunction nodeComparator, Predicate leafComparator) throws IOException { final long[] matchingNodeCount = {0}; + // create a custom IntersectVisitor that records the number of leafNodes that matched final IntersectVisitor visitor = new IntersectVisitor() { @Override public void visit(int docID) { + // this branch should be unreachable throw new UnsupportedOperationException( "This IntersectVisitor does not perform any actions on a " + "docID=" @@ -565,18 +644,27 @@ private void pointCount( Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); switch (r) { case CELL_OUTSIDE_QUERY: + // This cell is fully outside the query shape: return 0 as the count of its nodes return; case CELL_INSIDE_QUERY: + // This cell is fully inside the query shape: return the size of the entire node as the + // count matchingNodeCount[0] += pointTree.size(); return; case CELL_CROSSES_QUERY: + /* + The cell crosses the shape boundary, or the cell fully contains the query, so we fall + through and do full counting. + */ if (pointTree.moveToChild()) { do { pointCount(visitor, pointTree, matchingNodeCount); } while (pointTree.moveToSibling()); pointTree.moveToParent(); } else { + // we have reached a leaf node here. pointTree.visitDocValues(visitor); + // leaf node count is saved in the matchingNodeCount array by the visitor } return; default: @@ -643,6 +731,7 @@ public final String toString(String field) { sb.append(':'); } + // print ourselves as "range per dimension" for (int i = 0; i < numDims; i++) { if (i > 0) { sb.append(','); @@ -682,6 +771,7 @@ public Query rewrite(IndexSearcher searcher) throws IOException { checkValidPointValues(leaf.reader().getPointValues(field)); } + // fetch the global min/max packed values across all segments byte[] globalMinPacked = PointValues.getMinPackedValue(reader, getField()); byte[] globalMaxPacked = PointValues.getMaxPackedValue(reader, getField()); @@ -725,6 +815,7 @@ private boolean canRewriteToFieldExistsQuery(IndexReader reader) { && info.getDocValuesType() == DocValuesType.NONE && !info.hasNorms() && info.getVectorDimension() == 0) { + // Can't use a FieldExistsQuery on this segment, so return false return false; } } @@ -743,6 +834,8 @@ private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { return Relation.CELL_OUTSIDE_QUERY; } + // Evaluate crosses only when false. Still need to iterate through + // all the dimensions to ensure, none of them is completely outside if (crosses == false) { crosses = comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0 @@ -759,6 +852,7 @@ private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { private boolean checkValidPointValues(PointValues values) throws IOException { if (values == null) { + // No docs in this segment/field indexed any points return false; } From af7cde79d0b814d79e821bde450b3aa268b1a7ec Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Sun, 23 Nov 2025 11:53:10 -0800 Subject: [PATCH 5/8] Pointrange query optimizations Signed-off-by: Prudhvi Godithi --- .../apache/lucene/search/PointRangeQuery.java | 124 ++++++++++++------ 1 file changed, 86 insertions(+), 38 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 924125ec26dd..869a2307a85d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -242,8 +242,10 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { Relation relation = relate(minPackedValue, maxPackedValue); switch (relation) { case CELL_INSIDE_QUERY: + // all points match, skip this subtree return Relation.CELL_OUTSIDE_QUERY; case CELL_OUTSIDE_QUERY: + // none of the points match, clear all documents return Relation.CELL_INSIDE_QUERY; case CELL_CROSSES_QUERY: default: @@ -257,18 +259,21 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { * Helper class that lazily builds and caches a DocIdSet for an entire segment. This allows * multiple partitions of the same segment to share the BKD traversal work. */ - private class SegmentDocIdSetSupplier { + final class SegmentDocIdSetSupplier { private final LeafReaderContext context; - private final PointValues values; private volatile DocIdSet cachedDocIdSet = null; private final Object buildLock = new Object(); + private final long instanceId = System.nanoTime(); - SegmentDocIdSetSupplier(LeafReaderContext context, PointValues values) { + SegmentDocIdSetSupplier(LeafReaderContext context) { this.context = context; - this.values = values; - System.err.println("[SUPPLIER_CREATED] SegmentDocIdSetSupplier #" + instanceId + - " for segment " + context.ord); + + System.err.println( + "[SUPPLIER_CREATED] SegmentDocIdSetSupplier #" + + instanceId + + " for segment " + + context.ord); } /** @@ -276,40 +281,64 @@ private class SegmentDocIdSetSupplier { * others wait and reuse. */ DocIdSet getOrBuild() throws IOException { - System.err.println("[GET_OR_BUILD] Called on supplier #" + instanceId + - " for segment " + context.ord + - " on thread " + Thread.currentThread().getId()); + + System.err.println( + "[GET_OR_BUILD] Called on supplier #" + + instanceId + + " for segment " + + context.ord + + " on thread " + + Thread.currentThread().getId()); + DocIdSet result = cachedDocIdSet; if (result == null) { + System.err.println("[BUILD_CHECK] cachedDocIdSet is null, entering synchronized block"); + synchronized (buildLock) { result = cachedDocIdSet; if (result == null) { - System.err.println("[BUILD_START] Building DocIdSet for segment " + context.ord + - " on thread " + Thread.currentThread().getId()); + + System.err.println( + "[BUILD_START] Building DocIdSet for segment " + + context.ord + + " on thread " + + Thread.currentThread().getId()); + long start = System.nanoTime(); + result = buildDocIdSet(); + long elapsed = (System.nanoTime() - start) / 1_000_000; - System.err.println("[BUILD_COMPLETE] Built DocIdSet for segment " + context.ord + - " in " + elapsed + "ms"); + + System.err.println( + "[BUILD_COMPLETE] Built DocIdSet for segment " + + context.ord + + " in " + + elapsed + + "ms"); + cachedDocIdSet = result; } else { + System.err.println("[BUILD_SKIP] Another thread already built DocIdSet"); } } } else { + System.err.println("[CACHE_HIT] Reusing cached DocIdSet for segment " + context.ord); } return result; } private DocIdSet buildDocIdSet() throws IOException { + PointValues values = context.reader().getPointValues(field); LeafReader reader = context.reader(); // Check if we should use inverse intersection optimization if (values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size() - && estimateCost() > reader.maxDoc() / 2) { + && estimateCost(values) > reader.maxDoc() / 2) { // Build inverse bitset (docs that DON'T match) final FixedBitSet result = new FixedBitSet(reader.maxDoc()); @@ -330,7 +359,7 @@ && estimateCost() > reader.maxDoc() / 2) { } } - private long estimateCost() throws IOException { + private long estimateCost(PointValues values) throws IOException { DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), values); IntersectVisitor visitor = getIntersectVisitor(builder); return values.estimateDocCount(visitor); @@ -341,11 +370,18 @@ private long estimateCost() throws IOException { public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition) throws IOException { - System.err.println("[SCORER_SUPPLIER] Called for segment " + partition.ctx.ord + - " partition [" + partition.minDocId + ", " + partition.maxDocId + ") " + - "on thread " + Thread.currentThread().getId() + - " ctx identity: " + System.identityHashCode(partition.ctx)); - + System.err.println( + "[SCORER_SUPPLIER] Called for segment " + + partition.ctx.ord + + " partition [" + + partition.minDocId + + ", " + + partition.maxDocId + + ") " + + "on thread " + + Thread.currentThread().getId() + + " ctx identity: " + + System.identityHashCode(partition.ctx)); LeafReader reader = partition.ctx.reader(); @@ -393,27 +429,41 @@ public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition pa // all docs have a value and all points are within bounds, so everything matches return ConstantScoreScorerSupplier.matchAll(score(), scoreMode, reader.maxDoc()); } else { - System.err.println("[CACHE_LOOKUP] Before computeIfAbsent, cache size: " + segmentCache.size()); - // Get or create the cached supplier for this segment - SegmentDocIdSetSupplier segmentSupplier = segmentCache.computeIfAbsent( - partition.ctx, - ctx -> { - System.err.println("[CACHE_MISS] CREATING new SegmentDocIdSetSupplier for segment " + ctx.ord + - " on thread " + Thread.currentThread().getId()); - return new SegmentDocIdSetSupplier(ctx, values); - } - ); - System.err.println("[CACHE_RESULT] After computeIfAbsent, cache size: " + segmentCache.size() + - ", supplier identity: " + System.identityHashCode(segmentSupplier)); + System.err.println( + "[CACHE_LOOKUP] Before computeIfAbsent, cache size: " + segmentCache.size()); + // Get or create the cached supplier for this segment + SegmentDocIdSetSupplier segmentSupplier = + segmentCache.computeIfAbsent( + partition.ctx, + ctx -> { + + System.err.println( + "[CACHE_MISS] CREATING new SegmentDocIdSetSupplier for segment " + + ctx.ord + + " on thread " + + Thread.currentThread().getId()); + + return new SegmentDocIdSetSupplier(ctx); + }); + + System.err.println( + "[CACHE_RESULT] After computeIfAbsent, cache size: " + + segmentCache.size() + + ", supplier identity: " + + System.identityHashCode(segmentSupplier)); + + // Each call creates a new PartitionScorerSupplier and But all partitions share the same + // SegmentDocIdSetSupplier + // ConstantScoreScorerSupplier is designed for immediate execution: return new PartitionScorerSupplier( segmentSupplier, partition.minDocId, partition.maxDocId, score(), scoreMode); } } /** ScorerSupplier for a partition that filters results from the shared segment DocIdSet. */ - private class PartitionScorerSupplier extends ScorerSupplier { + final class PartitionScorerSupplier extends ScorerSupplier { private final SegmentDocIdSetSupplier segmentSupplier; private final int minDocId; private final int maxDocId; @@ -468,20 +518,18 @@ public long cost() { DocIdSet docIdSet = segmentSupplier.getOrBuild(); long totalCost = docIdSet.iterator().cost(); - // Estimate cost for this partition proportionally boolean isFullSegment = (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); if (isFullSegment) { return totalCost; } - // Proportional estimate based on partition size int segmentSize = segmentSupplier.context.reader().maxDoc(); int partitionSize = maxDocId - minDocId; return (totalCost * partitionSize) / segmentSize; } catch (IOException e) { - // If we can't get the cost, return a conservative estimate - return maxDocId - minDocId; + // Wrap in RuntimeException since cost() doesn't throw checked exceptions + throw new RuntimeException("Failed to compute cost", e); } } @@ -500,7 +548,7 @@ public BulkScorer bulkScorer() throws IOException { * Reading from a FixedBitSet is thread-safe (just reading from long[]), so multiple * partitions can read from the same underlying DocIdSet concurrently. */ - private static class PartitionFilteredDocIdSetIterator extends DocIdSetIterator { + static final class PartitionFilteredDocIdSetIterator extends DocIdSetIterator { private final DocIdSetIterator delegate; private final int minDocId; private final int maxDocId; From b1cefc199cfa096f43effd6c70fd93e204a3ab69 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Sun, 23 Nov 2025 17:08:16 -0800 Subject: [PATCH 6/8] Pointrange query optimizations Signed-off-by: Prudhvi Godithi --- .../apache/lucene/search/FilterWeight.java | 6 + .../apache/lucene/search/PointRangeQuery.java | 127 ++---------------- .../search/TestQueryProfilerWeight.java | 2 +- 3 files changed, 20 insertions(+), 115 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index 16bb75ef4062..efbd9badd8a2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -67,4 +67,10 @@ public Matches matches(LeafReaderContext context, int doc) throws IOException { public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { return in.scorerSupplier(context); } + + @Override + public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition) + throws IOException { + return in.scorerSupplier(partition); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 869a2307a85d..29c0b5ae60e5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -264,16 +264,8 @@ final class SegmentDocIdSetSupplier { private volatile DocIdSet cachedDocIdSet = null; private final Object buildLock = new Object(); - private final long instanceId = System.nanoTime(); - SegmentDocIdSetSupplier(LeafReaderContext context) { this.context = context; - - System.err.println( - "[SUPPLIER_CREATED] SegmentDocIdSetSupplier #" - + instanceId - + " for segment " - + context.ord); } /** @@ -281,52 +273,15 @@ final class SegmentDocIdSetSupplier { * others wait and reuse. */ DocIdSet getOrBuild() throws IOException { - - System.err.println( - "[GET_OR_BUILD] Called on supplier #" - + instanceId - + " for segment " - + context.ord - + " on thread " - + Thread.currentThread().getId()); - DocIdSet result = cachedDocIdSet; if (result == null) { - - System.err.println("[BUILD_CHECK] cachedDocIdSet is null, entering synchronized block"); - synchronized (buildLock) { result = cachedDocIdSet; if (result == null) { - - System.err.println( - "[BUILD_START] Building DocIdSet for segment " - + context.ord - + " on thread " - + Thread.currentThread().getId()); - - long start = System.nanoTime(); - result = buildDocIdSet(); - - long elapsed = (System.nanoTime() - start) / 1_000_000; - - System.err.println( - "[BUILD_COMPLETE] Built DocIdSet for segment " - + context.ord - + " in " - + elapsed - + "ms"); - cachedDocIdSet = result; - } else { - - System.err.println("[BUILD_SKIP] Another thread already built DocIdSet"); } } - } else { - - System.err.println("[CACHE_HIT] Reusing cached DocIdSet for segment " + context.ord); } return result; } @@ -334,21 +289,17 @@ DocIdSet getOrBuild() throws IOException { private DocIdSet buildDocIdSet() throws IOException { PointValues values = context.reader().getPointValues(field); LeafReader reader = context.reader(); - // Check if we should use inverse intersection optimization if (values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size() && estimateCost(values) > reader.maxDoc() / 2) { - // Build inverse bitset (docs that DON'T match) final FixedBitSet result = new FixedBitSet(reader.maxDoc()); long[] cost = new long[1]; values.intersect(getInverseIntersectVisitor(result, cost)); - // Flip to get docs that DO match result.flip(0, reader.maxDoc()); cost[0] = Math.max(0, reader.maxDoc() - cost[0]); - return new BitDocIdSet(result, cost[0]); } else { // Normal path: build DocIdSet from matching docs @@ -369,27 +320,11 @@ private long estimateCost(PointValues values) throws IOException { @Override public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition partition) throws IOException { - - System.err.println( - "[SCORER_SUPPLIER] Called for segment " - + partition.ctx.ord - + " partition [" - + partition.minDocId - + ", " - + partition.maxDocId - + ") " - + "on thread " - + Thread.currentThread().getId() - + " ctx identity: " - + System.identityHashCode(partition.ctx)); - LeafReader reader = partition.ctx.reader(); - PointValues values = reader.getPointValues(field); if (checkValidPointValues(values) == false) { return null; } - if (values.getDocCount() == 0) { return null; } else { @@ -407,7 +342,6 @@ public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition pa } } } - boolean allDocsMatch; if (values.getDocCount() == reader.maxDoc()) { final byte[] fieldPackedLower = values.getMinPackedValue(); @@ -424,39 +358,16 @@ public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition pa } else { allDocsMatch = false; } - if (allDocsMatch) { // all docs have a value and all points are within bounds, so everything matches return ConstantScoreScorerSupplier.matchAll(score(), scoreMode, reader.maxDoc()); } else { - - System.err.println( - "[CACHE_LOOKUP] Before computeIfAbsent, cache size: " + segmentCache.size()); - // Get or create the cached supplier for this segment SegmentDocIdSetSupplier segmentSupplier = segmentCache.computeIfAbsent( partition.ctx, - ctx -> { - - System.err.println( - "[CACHE_MISS] CREATING new SegmentDocIdSetSupplier for segment " - + ctx.ord - + " on thread " - + Thread.currentThread().getId()); - - return new SegmentDocIdSetSupplier(ctx); - }); - - System.err.println( - "[CACHE_RESULT] After computeIfAbsent, cache size: " - + segmentCache.size() - + ", supplier identity: " - + System.identityHashCode(segmentSupplier)); - - // Each call creates a new PartitionScorerSupplier and But all partitions share the same - // SegmentDocIdSetSupplier - // ConstantScoreScorerSupplier is designed for immediate execution: + ctx -> new SegmentDocIdSetSupplier(ctx)); + // Each call creates a new PartitionScorerSupplier and all partitions share the same SegmentDocIdSetSupplier return new PartitionScorerSupplier( segmentSupplier, partition.minDocId, partition.maxDocId, score(), scoreMode); } @@ -496,41 +407,34 @@ private DocIdSetIterator getIterator() throws IOException { // Get the shared DocIdSet (built once per segment) DocIdSet docIdSet = segmentSupplier.getOrBuild(); DocIdSetIterator fullIterator = docIdSet.iterator(); - if (fullIterator == null) { return null; } - // Check if this is a full segment (no partition filtering needed) boolean isFullSegment = (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); - if (isFullSegment) { return fullIterator; } - // Wrap iterator to filter to partition range return new PartitionFilteredDocIdSetIterator(fullIterator, minDocId, maxDocId); } @Override public long cost() { + DocIdSet docIdSet; try { - DocIdSet docIdSet = segmentSupplier.getOrBuild(); - long totalCost = docIdSet.iterator().cost(); - - boolean isFullSegment = (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); - - if (isFullSegment) { - return totalCost; - } - - int segmentSize = segmentSupplier.context.reader().maxDoc(); - int partitionSize = maxDocId - minDocId; - return (totalCost * partitionSize) / segmentSize; + docIdSet = segmentSupplier.getOrBuild(); } catch (IOException e) { - // Wrap in RuntimeException since cost() doesn't throw checked exceptions - throw new RuntimeException("Failed to compute cost", e); + throw new RuntimeException(e); + } + long totalCost = docIdSet.iterator().cost(); + boolean isFullSegment = (minDocId == 0 && maxDocId == DocIdSetIterator.NO_MORE_DOCS); + if (isFullSegment) { + return totalCost; } + int segmentSize = segmentSupplier.context.reader().maxDoc(); + int partitionSize = maxDocId - minDocId; + return (totalCost * partitionSize) / segmentSize; } @Override @@ -573,12 +477,10 @@ public int nextDoc() throws IOException { } else { doc = delegate.nextDoc(); } - // Stop if we've exceeded the partition range if (doc >= maxDocId) { doc = NO_MORE_DOCS; } - return doc; } @@ -587,15 +489,12 @@ public int advance(int target) throws IOException { if (target >= maxDocId) { return doc = NO_MORE_DOCS; } - // Ensure target is at least minDocId target = Math.max(target, minDocId); doc = delegate.advance(target); - if (doc >= maxDocId) { doc = NO_MORE_DOCS; } - return doc; } diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestQueryProfilerWeight.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestQueryProfilerWeight.java index 41dc054a0756..5bb3191804a3 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestQueryProfilerWeight.java +++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestQueryProfilerWeight.java @@ -177,7 +177,7 @@ public void testPropagateTopLevelScoringClause() throws IOException { Weight fakeWeight = new FakeWeight(query); QueryProfilerBreakdown profile = new QueryProfilerBreakdown(); QueryProfilerWeight profileWeight = new QueryProfilerWeight(fakeWeight, profile); - ScorerSupplier scorerSupplier = profileWeight.scorerSupplier(null); + ScorerSupplier scorerSupplier = profileWeight.scorerSupplier((LeafReaderContext) null); scorerSupplier.setTopLevelScoringClause(); assertEquals(42, scorerSupplier.cost()); } From ff1e95411b819b415acd8e343de33a474510c9d0 Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Sun, 23 Nov 2025 21:37:23 -0800 Subject: [PATCH 7/8] tidy Signed-off-by: Prudhvi Godithi --- .../src/java/org/apache/lucene/search/PointRangeQuery.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 29c0b5ae60e5..9cacbf0bb19c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -364,10 +364,9 @@ public ScorerSupplier scorerSupplier(IndexSearcher.LeafReaderContextPartition pa } else { // Get or create the cached supplier for this segment SegmentDocIdSetSupplier segmentSupplier = - segmentCache.computeIfAbsent( - partition.ctx, - ctx -> new SegmentDocIdSetSupplier(ctx)); - // Each call creates a new PartitionScorerSupplier and all partitions share the same SegmentDocIdSetSupplier + segmentCache.computeIfAbsent(partition.ctx, ctx -> new SegmentDocIdSetSupplier(ctx)); + // Each call creates a new PartitionScorerSupplier and all partitions share the same + // SegmentDocIdSetSupplier return new PartitionScorerSupplier( segmentSupplier, partition.minDocId, partition.maxDocId, score(), scoreMode); } From bf63818287ae64a95d3d31ead50974aa6568b2ed Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Mon, 24 Nov 2025 09:59:38 -0800 Subject: [PATCH 8/8] Update comments Signed-off-by: Prudhvi Godithi --- .../src/java/org/apache/lucene/search/PointRangeQuery.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 9cacbf0bb19c..0e6947054928 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -404,6 +404,8 @@ public Scorer get(long leadCost) throws IOException { private DocIdSetIterator getIterator() throws IOException { // Get the shared DocIdSet (built once per segment) + // The underlying FixedBitSet/int[] buffer is shared across all partitions, + // but each partition gets its own iterator with its own position state. DocIdSet docIdSet = segmentSupplier.getOrBuild(); DocIdSetIterator fullIterator = docIdSet.iterator(); if (fullIterator == null) { @@ -447,9 +449,8 @@ public BulkScorer bulkScorer() throws IOException { } /** - * Iterator that filters another iterator to only return docs within a partition range. - * Reading from a FixedBitSet is thread-safe (just reading from long[]), so multiple - * partitions can read from the same underlying DocIdSet concurrently. + * Iterator that filters a delegate iterator to only return docs within a partition range. + * Used to restrict a full-segment DocIdSetIterator to a specific partition's boundaries. */ static final class PartitionFilteredDocIdSetIterator extends DocIdSetIterator { private final DocIdSetIterator delegate;