Skip to content

Commit b638d5f

Browse files
author
Saurabh Singh
committed
Support for traversing BKD tree with prefetching
1 parent 8e8e37d commit b638d5f

File tree

3 files changed

+214
-2
lines changed

3 files changed

+214
-2
lines changed

lucene/core/src/java/org/apache/lucene/index/PointValues.java

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import java.io.UncheckedIOException;
2121
import java.math.BigInteger;
2222
import java.net.InetAddress;
23+
import java.util.List;
2324
import org.apache.lucene.document.BinaryPoint;
2425
import org.apache.lucene.document.DoublePoint;
2526
import org.apache.lucene.document.Field;
@@ -276,6 +277,15 @@ public interface PointTree extends Cloneable {
276277
void visitDocValues(IntersectVisitor visitor) throws IOException;
277278
}
278279

280+
public interface PrefetchablePointTree extends PointTree {
281+
282+
/** Visit all the docs below the node at position pos */
283+
void visitDocIDs(long pos, IntersectVisitor visitor) throws IOException;
284+
285+
/** call prefetch for docs below the current node */
286+
void prepareVisitDocIDs(PrefetchCapableVisitor prefetchCapableVisitor) throws IOException;
287+
}
288+
279289
/**
280290
* We recurse the {@link PointTree}, using a provided instance of this to guide the recursion.
281291
*
@@ -341,6 +351,65 @@ default void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOExcep
341351
default void grow(int count) {}
342352
}
343353

354+
public interface PrefetchCapableVisitor extends IntersectVisitor {
355+
356+
/**
357+
* return the last matched block ordinal - this is used to avoid prefetching call for contiguous
358+
* ordinals assuming contiguous ordinals prefetching can be taken care by readaheads.
359+
*/
360+
int lastMatchedBlock();
361+
362+
/** set last matched block ordinal * */
363+
void setLastMatchedBlock(int leafNodeOrdinal);
364+
365+
/** save prefetched block for visting later on * */
366+
void savePrefetchedBlockForLaterVisit(long leafFp);
367+
368+
/** returns the saved prefetch blocks * */
369+
List<Long> savedPrefetchedBlocks();
370+
}
371+
372+
public final void intersectWithPrefetch(PrefetchCapableVisitor visitor) throws IOException {
373+
final PointTree pointTree = getPointTree();
374+
assert pointTree instanceof PrefetchablePointTree;
375+
PrefetchablePointTree prefetchablePointTree = (PrefetchablePointTree) pointTree;
376+
intersectWithPrefetch(visitor, prefetchablePointTree);
377+
List<Long> fps = visitor.savedPrefetchedBlocks();
378+
for (int fp = 0; fp < fps.size(); ++fp) {
379+
prefetchablePointTree.visitDocIDs(fps.get(fp), visitor);
380+
}
381+
382+
assert prefetchablePointTree.moveToParent() == false;
383+
}
384+
385+
private static void intersectWithPrefetch(
386+
PrefetchCapableVisitor visitor, PrefetchablePointTree pointTree) throws IOException {
387+
while (true) {
388+
Relation compare =
389+
visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
390+
if (compare == Relation.CELL_INSIDE_QUERY) {
391+
// This cell is fully inside the query shape: recursively add all points in this cell
392+
// without filtering
393+
pointTree.prepareVisitDocIDs(visitor);
394+
} else if (compare == Relation.CELL_CROSSES_QUERY) {
395+
// The cell crosses the shape boundary, or the cell fully contains the query, so we fall
396+
// through and do full filtering:
397+
if (pointTree.moveToChild()) {
398+
continue;
399+
}
400+
// TODO: we can assert that the first value here in fact matches what the pointTree
401+
// claimed?
402+
// Leaf node; scan and filter all points in this block:
403+
pointTree.visitDocValues(visitor);
404+
}
405+
while (pointTree.moveToSibling() == false) {
406+
if (pointTree.moveToParent() == false) {
407+
return;
408+
}
409+
}
410+
}
411+
}
412+
344413
/**
345414
* Finds all documents and points matching the provided visitor. This method does not enforce live
346415
* documents, so it's up to the caller to test whether each document is deleted, if necessary.

lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ public PointTree getPointTree() throws IOException {
195195
isTreeBalanced);
196196
}
197197

198-
private static class BKDPointTree implements PointTree {
198+
private static class BKDPointTree implements PrefetchablePointTree {
199199
private int nodeID;
200200
// during clone, the node root can be different to 1
201201
private final int nodeRoot;
@@ -589,6 +589,54 @@ public void visitDocIDs(PointValues.IntersectVisitor visitor) throws IOException
589589
addAll(visitor, false);
590590
}
591591

592+
@Override
593+
public void prepareVisitDocIDs(PrefetchCapableVisitor prefetchCapableVisitor)
594+
throws IOException {
595+
resetNodeDataPosition();
596+
prefetchAll(prefetchCapableVisitor);
597+
}
598+
599+
@Override
600+
public void visitDocIDs(long position, IntersectVisitor visitor) throws IOException {
601+
leafNodes.seek(position);
602+
int count = leafNodes.readVInt();
603+
if (count <= Integer.MAX_VALUE) {
604+
visitor.grow(count);
605+
}
606+
docIdsWriter.readInts(leafNodes, count, visitor, scratchIterator.docIDs);
607+
}
608+
609+
private int getLeafNodeOrdinal() {
610+
assert isLeafNode() : "nodeID=" + nodeID + " is not a leaf";
611+
return nodeID - leafNodeOffset;
612+
}
613+
614+
public void prefetchAll(PrefetchCapableVisitor prefetchCapableVisitor) throws IOException {
615+
if (isLeafNode()) {
616+
// int count = isLastLeaf() ? config.maxPointsInLeafNode() : lastLeafNodePointCount;
617+
long leafFp = getLeafBlockFP();
618+
int leafNodeOrdinal = getLeafNodeOrdinal();
619+
// Only call prefetch is this is the first leaf node ordinal or the first match in
620+
// contigiuous sequence of matches for leaf nodes
621+
// boolean prefetched = false;
622+
if (prefetchCapableVisitor.lastMatchedBlock() == -1
623+
|| prefetchCapableVisitor.lastMatchedBlock() + 1 < leafNodeOrdinal) {
624+
// System.out.println("Prefetched called on " + leafNodeOrdinal);
625+
leafNodes.prefetch(leafFp, 1);
626+
// prefetched = true;
627+
}
628+
prefetchCapableVisitor.setLastMatchedBlock(leafNodeOrdinal);
629+
prefetchCapableVisitor.savePrefetchedBlockForLaterVisit(leafFp);
630+
} else {
631+
pushLeft();
632+
prefetchAll(prefetchCapableVisitor);
633+
pop();
634+
pushRight();
635+
prefetchAll(prefetchCapableVisitor);
636+
pop();
637+
}
638+
}
639+
592640
public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws IOException {
593641
if (grown == false) {
594642
final long size = size();

lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1485,7 +1485,11 @@ private void assertStats(int maxDoc) {
14851485
@Override
14861486
public PointTree getPointTree() throws IOException {
14871487
assertThread("Points", creationThread);
1488-
return new AssertingPointTree(in, in.getPointTree());
1488+
if (in instanceof PrefetchablePointTree) {
1489+
return new AssertingPrefetchablePointTree(in, (PrefetchablePointTree) in.getPointTree());
1490+
} else {
1491+
return new AssertingPointTree(in, in.getPointTree());
1492+
}
14891493
}
14901494

14911495
@Override
@@ -1599,6 +1603,97 @@ public void visitDocValues(IntersectVisitor visitor) throws IOException {
15991603
}
16001604
}
16011605

1606+
static class AssertingPrefetchablePointTree implements PointValues.PrefetchablePointTree {
1607+
1608+
final PointValues pointValues;
1609+
final PointValues.PrefetchablePointTree in;
1610+
1611+
AssertingPrefetchablePointTree(PointValues pointValues, PointValues.PrefetchablePointTree in) {
1612+
this.pointValues = pointValues;
1613+
this.in = in;
1614+
}
1615+
1616+
@Override
1617+
public PointValues.PointTree clone() {
1618+
return new AssertingPrefetchablePointTree(
1619+
pointValues, (PointValues.PrefetchablePointTree) in.clone());
1620+
}
1621+
1622+
@Override
1623+
public boolean moveToChild() throws IOException {
1624+
return in.moveToChild();
1625+
}
1626+
1627+
@Override
1628+
public boolean moveToSibling() throws IOException {
1629+
return in.moveToSibling();
1630+
}
1631+
1632+
@Override
1633+
public boolean moveToParent() throws IOException {
1634+
return in.moveToParent();
1635+
}
1636+
1637+
@Override
1638+
public byte[] getMinPackedValue() {
1639+
return in.getMinPackedValue();
1640+
}
1641+
1642+
@Override
1643+
public byte[] getMaxPackedValue() {
1644+
return in.getMaxPackedValue();
1645+
}
1646+
1647+
@Override
1648+
public long size() {
1649+
final long size = in.size();
1650+
assert size > 0;
1651+
return size;
1652+
}
1653+
1654+
@Override
1655+
public void visitDocIDs(IntersectVisitor visitor) throws IOException {
1656+
in.visitDocIDs(
1657+
new AssertingIntersectVisitor(
1658+
pointValues.getNumDimensions(),
1659+
pointValues.getNumIndexDimensions(),
1660+
pointValues.getBytesPerDimension(),
1661+
visitor));
1662+
}
1663+
1664+
@Override
1665+
public void visitDocValues(IntersectVisitor visitor) throws IOException {
1666+
in.visitDocValues(
1667+
new AssertingIntersectVisitor(
1668+
pointValues.getNumDimensions(),
1669+
pointValues.getNumIndexDimensions(),
1670+
pointValues.getBytesPerDimension(),
1671+
visitor));
1672+
}
1673+
1674+
/**
1675+
* Visit all the docs below the node at position pos
1676+
*
1677+
* @param pos position of block from where to start reading doc ids
1678+
* @param visitor visitor that will visit doc ids.
1679+
*/
1680+
@Override
1681+
public void visitDocIDs(long pos, IntersectVisitor visitor) throws IOException {
1682+
in.visitDocIDs(pos, visitor);
1683+
}
1684+
1685+
/**
1686+
* call prefetch for docs below the current node
1687+
*
1688+
* @param prefetchCapableVisitor prefetch capable visitors
1689+
*/
1690+
@Override
1691+
public void prepareVisitDocIDs(PointValues.PrefetchCapableVisitor prefetchCapableVisitor)
1692+
throws IOException {
1693+
in.prepareVisitDocIDs(prefetchCapableVisitor);
1694+
}
1695+
}
1696+
16021697
/**
16031698
* Validates in the 1D case that all points are visited in order, and point values are in bounds
16041699
* of the last cell checked

0 commit comments

Comments
 (0)