Skip to content

Commit e706267

Browse files
authored
[QueryCache] Use RamUsageEstimator instead of defaulting to 1024 bytes for non-accountable query size (#15124)
* Use RamUsageEstimator to calculate query size instead of the default 1024 bytes for non accountable queries * Use try-with-resources for directory and indexWriter * Adding changes to cache query size and queries per clause to reduce impact of repeated visit() calls during RamUsageEstimator.sizeOf() * Adding changelog entry * Making queries per clause list immutable * Adding unit test to verify query size is cached correctly * Renmaing QueryMetadata to Record * Changing query metadata to record type and removing boolean query changes
1 parent ec4cd9f commit e706267

File tree

3 files changed

+101
-31
lines changed

3 files changed

+101
-31
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ Improvements
6464
SIGSEGV with the old unsafe "mmap hack", which has been replaced by MemorySegments.
6565
(Uwe Schindler, Robert Muir)
6666

67+
* GITHUB#15124: Use RamUsageEstimator to calculate size for non-accountable queries. (Sagar Upadhyaya)
68+
6769
Optimizations
6870
---------------------
6971
* GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina)

lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
import static org.apache.lucene.util.RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_ENTRY;
2020
import static org.apache.lucene.util.RamUsageEstimator.LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
21-
import static org.apache.lucene.util.RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED;
2221

2322
import java.io.IOException;
2423
import java.util.ArrayList;
@@ -91,7 +90,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
9190
private final Predicate<LeafReaderContext> leavesToCache;
9291
// maps queries that are contained in the cache to a singleton so that this
9392
// cache does not store several copies of the same query
94-
private final Map<Query, Query> uniqueQueries;
93+
private final Map<Query, QueryMetadata> uniqueQueries;
9594
// The contract between this set and the per-leaf caches is that per-leaf caches
9695
// are only allowed to store sub-sets of the queries that are contained in
9796
// mostRecentlyUsedQueries. This is why write operations are performed under a lock
@@ -176,6 +175,11 @@ public LRUQueryCache(int maxSize, long maxRamBytesUsed) {
176175
this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000), 10);
177176
}
178177

178+
// pkg-private for testing
179+
Map<Query, QueryMetadata> getUniqueQueries() {
180+
return uniqueQueries;
181+
}
182+
179183
// pkg-private for testing
180184
static class MinSegmentSizePredicate implements Predicate<LeafReaderContext> {
181185
private final int minSize;
@@ -300,32 +304,35 @@ CacheAndCount get(Query key, IndexReader.CacheHelper cacheHelper) {
300304
return null;
301305
}
302306
// this get call moves the query to the most-recently-used position
303-
final Query singleton = uniqueQueries.get(key);
304-
if (singleton == null) {
307+
final QueryMetadata record = uniqueQueries.get(key);
308+
if (record == null || record.query == null) {
305309
onMiss(readerKey, key);
306310
return null;
307311
}
308-
final CacheAndCount cached = leafCache.get(singleton);
312+
final CacheAndCount cached = leafCache.get(record.query);
309313
if (cached == null) {
310-
onMiss(readerKey, singleton);
314+
onMiss(readerKey, record.query);
311315
} else {
312-
onHit(readerKey, singleton);
316+
onHit(readerKey, record.query);
313317
}
314318
return cached;
315319
}
316320

317-
private void putIfAbsent(Query query, CacheAndCount cached, IndexReader.CacheHelper cacheHelper) {
321+
public void putIfAbsent(Query query, CacheAndCount cached, IndexReader.CacheHelper cacheHelper) {
318322
assert query instanceof BoostQuery == false;
319323
assert query instanceof ConstantScoreQuery == false;
320324
// under a lock to make sure that mostRecentlyUsedQueries and cache remain sync'ed
321325
writeLock.lock();
322326
try {
323-
Query singleton = uniqueQueries.putIfAbsent(query, query);
324-
if (singleton == null) {
325-
onQueryCache(query, getRamBytesUsed(query));
326-
} else {
327-
query = singleton;
328-
}
327+
QueryMetadata record =
328+
uniqueQueries.computeIfAbsent(
329+
query,
330+
q -> {
331+
long queryRamBytesUsed = getRamBytesUsed(q);
332+
onQueryCache(q, queryRamBytesUsed);
333+
return new QueryMetadata(q, queryRamBytesUsed);
334+
});
335+
query = record.query;
329336
final IndexReader.CacheKey key = cacheHelper.getKey();
330337
LeafCache leafCache = cache.get(key);
331338
if (leafCache == null) {
@@ -347,25 +354,24 @@ private void evictIfNecessary() {
347354
assert writeLock.isHeldByCurrentThread();
348355
// under a lock to make sure that mostRecentlyUsedQueries and cache keep sync'ed
349356
if (requiresEviction()) {
350-
351-
Iterator<Query> iterator = mostRecentlyUsedQueries.iterator();
357+
Iterator<Map.Entry<Query, QueryMetadata>> iterator = uniqueQueries.entrySet().iterator();
352358
do {
353-
final Query query = iterator.next();
354-
final int size = mostRecentlyUsedQueries.size();
359+
final Map.Entry<Query, QueryMetadata> entry = iterator.next();
360+
final int size = uniqueQueries.size();
355361
iterator.remove();
356-
if (size == mostRecentlyUsedQueries.size()) {
362+
if (size == uniqueQueries.size()) {
357363
// size did not decrease, because the hash of the query changed since it has been
358364
// put into the cache
359365
throw new ConcurrentModificationException(
360366
"Removal from the cache failed! This "
361367
+ "is probably due to a query which has been modified after having been put into "
362368
+ " the cache or a badly implemented clone(). Query class: ["
363-
+ query.getClass()
369+
+ entry.getKey().getClass()
364370
+ "], query: ["
365-
+ query
371+
+ entry.getKey()
366372
+ "]");
367373
}
368-
onEviction(query);
374+
onEviction(entry.getKey(), entry.getValue().queryRamBytesUsed);
369375
} while (iterator.hasNext() && requiresEviction());
370376
}
371377
}
@@ -394,18 +400,18 @@ public void clearCoreCacheKey(Object coreKey) {
394400
public void clearQuery(Query query) {
395401
writeLock.lock();
396402
try {
397-
final Query singleton = uniqueQueries.remove(query);
398-
if (singleton != null) {
399-
onEviction(singleton);
403+
final QueryMetadata record = uniqueQueries.remove(query);
404+
if (record != null && record.query != null) {
405+
onEviction(record.query, record.queryRamBytesUsed);
400406
}
401407
} finally {
402408
writeLock.unlock();
403409
}
404410
}
405411

406-
private void onEviction(Query singleton) {
412+
private void onEviction(Query singleton, long querySizeInBytes) {
407413
assert writeLock.isHeldByCurrentThread();
408-
onQueryEviction(singleton, getRamBytesUsed(singleton));
414+
onQueryEviction(singleton, querySizeInBytes);
409415
for (LeafCache leafCache : cache.values()) {
410416
leafCache.remove(singleton);
411417
}
@@ -426,10 +432,9 @@ public void clear() {
426432
}
427433

428434
private static long getRamBytesUsed(Query query) {
429-
return LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY
430-
+ (query instanceof Accountable accountableQuery
431-
? accountableQuery.ramBytesUsed()
432-
: QUERY_DEFAULT_RAM_BYTES_USED);
435+
// Here 32 represents a rough shallow size for a query object
436+
long queryRamBytesUsed = RamUsageEstimator.sizeOf(query, 32);
437+
return LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + queryRamBytesUsed;
433438
}
434439

435440
// pkg-private for testing
@@ -734,6 +739,9 @@ public long ramBytesUsed() {
734739
}
735740
}
736741

742+
// pkg-private for testing
743+
record QueryMetadata(Query query, long queryRamBytesUsed) {}
744+
737745
private class CachingWrapperWeight extends ConstantScoreWeight {
738746

739747
private final Weight in;

lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
import org.apache.lucene.util.Accountable;
7575
import org.apache.lucene.util.BytesRef;
7676
import org.apache.lucene.util.IOUtils;
77+
import org.apache.lucene.util.RamUsageEstimator;
7778
import org.apache.lucene.util.SuppressForbidden;
7879

7980
public class TestLRUQueryCache extends LuceneTestCase {
@@ -1062,6 +1063,65 @@ public void testBooleanQueryCachesSubClauses() throws IOException {
10621063
dir.close();
10631064
}
10641065

1066+
public void testQuerySizeBytesAreCached() throws IOException {
1067+
try (Directory dir = newDirectory();
1068+
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
1069+
Document doc = new Document();
1070+
doc.add(new StringField("foo", "bar", Store.YES));
1071+
doc.add(new StringField("foo", "quux", Store.YES));
1072+
w.addDocument(doc);
1073+
w.commit();
1074+
final IndexReader reader = w.getReader();
1075+
final IndexSearcher searcher = newSearcher(reader);
1076+
final LRUQueryCache queryCache =
1077+
new LRUQueryCache(1000000, 10000000, _ -> true, Float.POSITIVE_INFINITY);
1078+
searcher.setQueryCache(queryCache);
1079+
searcher.setQueryCachingPolicy(ALWAYS_CACHE);
1080+
StringBuilder sb = new StringBuilder();
1081+
sb.append("a".repeat(100));
1082+
String term = sb.toString();
1083+
TermQuery termQuery = new TermQuery(new Term("foo", term));
1084+
long expectedQueryInBytes =
1085+
LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + RamUsageEstimator.sizeOf(termQuery, 32);
1086+
searcher.search(new ConstantScoreQuery(termQuery), 1);
1087+
1088+
assertEquals(1, queryCache.getUniqueQueries().size());
1089+
assertEquals(
1090+
expectedQueryInBytes, queryCache.getUniqueQueries().get(termQuery).queryRamBytesUsed());
1091+
reader.close();
1092+
}
1093+
}
1094+
1095+
public void testCacheRamBytesWithALargeTermQuery() throws IOException {
1096+
try (Directory dir = newDirectory();
1097+
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
1098+
Document doc = new Document();
1099+
doc.add(new StringField("foo", "bar", Store.YES));
1100+
doc.add(new StringField("foo", "quux", Store.YES));
1101+
w.addDocument(doc);
1102+
w.commit();
1103+
final IndexReader reader = w.getReader();
1104+
final IndexSearcher searcher = newSearcher(reader);
1105+
final LRUQueryCache queryCache =
1106+
new LRUQueryCache(1000000, 10000000, _ -> true, Float.POSITIVE_INFINITY);
1107+
searcher.setQueryCache(queryCache);
1108+
searcher.setQueryCachingPolicy(ALWAYS_CACHE);
1109+
StringBuilder sb = new StringBuilder();
1110+
// Create a large string for the field value so it certainly exceeds the default query size we
1111+
// use ie 1024 bytes.
1112+
sb.append("a".repeat(1200));
1113+
String longTerm = sb.toString();
1114+
TermQuery must = new TermQuery(new Term("foo", longTerm));
1115+
long queryInBytes = RamUsageEstimator.sizeOf(must, 32);
1116+
assertTrue(queryInBytes > QUERY_DEFAULT_RAM_BYTES_USED);
1117+
searcher.search(new ConstantScoreQuery(must), 1);
1118+
1119+
assertEquals(1, queryCache.cachedQueries().size());
1120+
assertTrue(queryCache.ramBytesUsed() >= queryInBytes);
1121+
reader.close();
1122+
}
1123+
}
1124+
10651125
private static Term randomTerm() {
10661126
final String term = RandomPicks.randomFrom(random(), Arrays.asList("foo", "bar", "baz"));
10671127
return new Term("foo", term);

0 commit comments

Comments
 (0)