From 25b6e4dac7c7839066e3b996bf602b46cc033e04 Mon Sep 17 00:00:00 2001
From: Ankit Jain <jainankitk@apache.org>
Date: Wed, 29 Oct 2025 12:58:20 -0700
Subject: [PATCH 1/3] Initial implementation of bulk collection from bulk
 scorer

Signed-off-by: Ankit Jain <jainankitk@apache.org>
---
 .../apache/lucene/search/LeafCollector.java   | 29 ++++++++++
 .../lucene/search/TopScoreDocCollector.java   |  5 ++
 .../java/org/apache/lucene/search/Weight.java | 54 ++++++++++++++-----
 3 files changed, 76 insertions(+), 12 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
index f8765c18372d..b019784b3a11 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
@@ -123,6 +123,35 @@ default void collect(DocIdStream stream) throws IOException {
     stream.forEach(this::collect);
   }
 
+  /**
+   * Bulk-collect doc IDs.
+   *
+   * <p>Note: The provided int[] may be reused across calls and should be consumed
+   * immediately.
+   *
+   * <p>Note: The provided int[] typically only holds a small subset of query matches.
+   * This method may be called multiple times per segment.
+   *
+   * <p>Like {@link #collect(int)}, it is guaranteed that doc IDs get collected in order, ie. doc
+   * IDs are collected in order within a int[], and if called twice, all doc IDs from
+   * the second int[] will be greater than all doc IDs from the first int[].
+   *
+   * <p>It is legal for callers to mix calls to {@link #collect(int[])}, {@link #collect(DocIdStream)}
+   * and {@link #collect(int)}.
+   *
+   * <p>The default implementation calls
+   * {@code
+   * for(int i = 0; i < count; ++i) {
+   *  collect(docs[i]);
+   * };
+   * }.
+   */
+  default void collect(int[] docs, int count) throws IOException {
+    for(int i = 0; i < count; ++i) {
+      collect(docs[i]);
+    };
+  }
+
   /**
    * Optionally returns an iterator over competitive documents.
    *
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
index e878f6f880b8..09a7146f8246 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
@@ -127,6 +127,11 @@ public void collect(int doc) throws IOException {
         }
       }
 
+      @Override
+      public void collect(int[] docs, int count) {
+        collect(docs, count);
+      }
+
       private void collectCompetitiveHit(int doc, float score) throws IOException {
         final long code = DocScoreEncoder.encode(doc + docBase, score);
         topCode = heap.updateTop(code);
diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java
index 341dd3cadf6a..04e019d791b4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@@ -275,29 +275,35 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
       // collect() because only a subset of collectors produce a competitive iterator, and the set
       // of implementing classes for two-phase approximations is smaller than the set of doc id set
       // iterator implementations.
+
+      // Is it better to initialize the buffer within each iterator implementation?
+      int[] docBuffer = new int[64];
       if (twoPhase == null && competitiveIterator == null) {
         // Optimize simple iterators with collectors that can't skip
-        scoreIterator(collector, acceptDocs, iterator, max);
+        scoreIterator(collector, acceptDocs, iterator, max, docBuffer);
       } else if (competitiveIterator == null) {
-        scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max);
+        scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max, docBuffer);
       } else if (twoPhase == null) {
-        scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max);
+        scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max, docBuffer);
       } else {
         scoreTwoPhaseOrCompetitiveIterator(
-            collector, acceptDocs, iterator, twoPhase, competitiveIterator, max);
+            collector, acceptDocs, iterator, twoPhase, competitiveIterator, max, docBuffer);
       }
 
       return iterator.docID();
     }
 
     private static void scoreIterator(
-        LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max)
+        LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max, int[] docs)
         throws IOException {
+      int count = 0;
       for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
         if (acceptDocs == null || acceptDocs.get(doc)) {
-          collector.collect(doc);
+          count = collect(collector, docs, count, doc);
         }
       }
+
+      collect(collector, docs, count, -1);
     }
 
     private static void scoreTwoPhaseIterator(
@@ -305,13 +311,17 @@ private static void scoreTwoPhaseIterator(
         Bits acceptDocs,
         DocIdSetIterator iterator,
         TwoPhaseIterator twoPhase,
-        int max)
+        int max,
+        int[] docs)
         throws IOException {
+      int count = 0;
       for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
         if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
-          collector.collect(doc);
+          count = collect(collector, docs, count, doc);
         }
       }
+
+      collect(collector, docs, count, -1);
     }
 
     private static void scoreCompetitiveIterator(
@@ -319,8 +329,10 @@ private static void scoreCompetitiveIterator(
         Bits acceptDocs,
         DocIdSetIterator iterator,
         DocIdSetIterator competitiveIterator,
-        int max)
+        int max,
+        int[] docs)
         throws IOException {
+      int count = 0;
       for (int doc = iterator.docID(); doc < max; ) {
         assert competitiveIterator.docID() <= doc; // invariant
         if (competitiveIterator.docID() < doc) {
@@ -332,11 +344,13 @@ private static void scoreCompetitiveIterator(
         }
 
         if ((acceptDocs == null || acceptDocs.get(doc))) {
-          collector.collect(doc);
+          count = collect(collector, docs, count, doc);
         }
 
         doc = iterator.nextDoc();
       }
+
+      collect(collector, docs, count, -1);
     }
 
     private static void scoreTwoPhaseOrCompetitiveIterator(
@@ -345,8 +359,10 @@ private static void scoreTwoPhaseOrCompetitiveIterator(
         DocIdSetIterator iterator,
         TwoPhaseIterator twoPhase,
         DocIdSetIterator competitiveIterator,
-        int max)
+        int max,
+        int[] docs)
         throws IOException {
+      int count = 0;
       for (int doc = iterator.docID(); doc < max; ) {
         assert competitiveIterator.docID() <= doc; // invariant
         if (competitiveIterator.docID() < doc) {
@@ -358,11 +374,25 @@ private static void scoreTwoPhaseOrCompetitiveIterator(
         }
 
         if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
-          collector.collect(doc);
+          count = collect(collector, docs, count, doc);
         }
 
         doc = iterator.nextDoc();
       }
+
+      collect(collector, docs, count, -1);
+    }
+
+    private static int collect(LeafCollector collector, int[] docs, int count, int docId) throws IOException {
+      if (count == docs.length || docId == -1) {
+        collector.collect(docs, count);
+        count = 0;
+      }
+
+      // count is always expected to be less than docs.length
+      docs[count++] = docId;
+
+      return count;
     }
   }
 }

From feed60b71f9cc59e72bd0050ac740e5a69b631a2 Mon Sep 17 00:00:00 2001
From: Ankit Jain <jainankitk@apache.org>
Date: Wed, 29 Oct 2025 13:00:39 -0700
Subject: [PATCH 2/3] tidy

Signed-off-by: Ankit Jain <jainankitk@apache.org>
---
 .../apache/lucene/search/LeafCollector.java   | 32 ++++++++-----------
 .../java/org/apache/lucene/search/Weight.java |  6 ++--
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
index b019784b3a11..8d772cbcd6d5 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
@@ -126,30 +126,26 @@ default void collect(DocIdStream stream) throws IOException {
   /**
    * Bulk-collect doc IDs.
    *
-   * <p>Note: The provided int[] may be reused across calls and should be consumed
-   * immediately.
+   * <p>Note: The provided int[] may be reused across calls and should be consumed immediately.
    *
-   * <p>Note: The provided int[] typically only holds a small subset of query matches.
-   * This method may be called multiple times per segment.
+   * <p>Note: The provided int[] typically only holds a small subset of query matches. This method
+   * may be called multiple times per segment.
    *
    * <p>Like {@link #collect(int)}, it is guaranteed that doc IDs get collected in order, ie. doc
-   * IDs are collected in order within a int[], and if called twice, all doc IDs from
-   * the second int[] will be greater than all doc IDs from the first int[].
-   *
-   * <p>It is legal for callers to mix calls to {@link #collect(int[])}, {@link #collect(DocIdStream)}
-   * and {@link #collect(int)}.
-   *
-   * <p>The default implementation calls
-   * {@code
-   * for(int i = 0; i < count; ++i) {
-   *  collect(docs[i]);
-   * };
-   * }.
+   * IDs are collected in order within a int[], and if called twice, all doc IDs from the second
+   * int[] will be greater than all doc IDs from the first int[].
+   *
+   * <p>It is legal for callers to mix calls to {@link #collect(int[], int)}, {@link
+   * #collect(DocIdStream)} and {@link #collect(int)}.
+   *
+   * <p>The default implementation calls {@code for(int i = 0; i < count; ++i) { collect(docs[i]);
+   * }; }.
    */
   default void collect(int[] docs, int count) throws IOException {
-    for(int i = 0; i < count; ++i) {
+    for (int i = 0; i < count; ++i) {
       collect(docs[i]);
-    };
+    }
+    ;
   }
 
   /**
diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java
index 04e019d791b4..e10155e2df01 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@@ -284,7 +284,8 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
       } else if (competitiveIterator == null) {
         scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max, docBuffer);
       } else if (twoPhase == null) {
-        scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max, docBuffer);
+        scoreCompetitiveIterator(
+            collector, acceptDocs, iterator, competitiveIterator, max, docBuffer);
       } else {
         scoreTwoPhaseOrCompetitiveIterator(
             collector, acceptDocs, iterator, twoPhase, competitiveIterator, max, docBuffer);
@@ -383,7 +384,8 @@ private static void scoreTwoPhaseOrCompetitiveIterator(
       collect(collector, docs, count, -1);
     }
 
-    private static int collect(LeafCollector collector, int[] docs, int count, int docId) throws IOException {
+    private static int collect(LeafCollector collector, int[] docs, int count, int docId)
+        throws IOException {
       if (count == docs.length || docId == -1) {
         collector.collect(docs, count);
         count = 0;

From 82fe4e63f93c79e7c68892fe59ffa2dccd031459 Mon Sep 17 00:00:00 2001
From: Ankit Jain <jainankitk@apache.org>
Date: Wed, 29 Oct 2025 18:05:15 -0700
Subject: [PATCH 3/3] Minor fi

Signed-off-by: Ankit Jain <jainankitk@apache.org>
---
 .../src/java/org/apache/lucene/search/LeafCollector.java    | 1 -
 .../java/org/apache/lucene/search/TopScoreDocCollector.java | 6 ++++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
index 8d772cbcd6d5..ac54455559e5 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
@@ -145,7 +145,6 @@ default void collect(int[] docs, int count) throws IOException {
     for (int i = 0; i < count; ++i) {
       collect(docs[i]);
     }
-    ;
   }
 
   /**
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
index 09a7146f8246..90fbc4472373 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
@@ -128,8 +128,10 @@ public void collect(int doc) throws IOException {
       }
 
       @Override
-      public void collect(int[] docs, int count) {
-        collect(docs, count);
+      public void collect(int[] docs, int count) throws IOException {
+        for (int i = 0; i < count; ++i) {
+          collect(docs[i]);
+        }
       }
 
       private void collectCompetitiveHit(int doc, float score) throws IOException {