From 093a8f809d71a32a76d28ee8e501af861d046cb2 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Sat, 31 May 2025 12:05:44 -0400 Subject: [PATCH 01/18] GH-5343 speed-up size calculation in LmdbStore --- .../sail/base/DelegatingSailDataset.java | 5 + .../eclipse/rdf4j/sail/base/SailDataset.java | 5 + .../rdf4j/sail/base/SailDatasetImpl.java | 13 +- .../rdf4j/sail/base/SailSourceConnection.java | 18 +++ .../rdf4j/sail/lmdb/LmdbSailStore.java | 72 +++++++++ .../rdf4j/sail/lmdb/LmdbStoreConnection.java | 6 +- .../eclipse/rdf4j/sail/lmdb/TripleStore.java | 90 ++++++++++- .../rdf4j/sail/lmdb/CardinalityExactTest.java | 149 ++++++++++++++++++ .../rdf4j/sail/lmdb/LmdbSailStoreTest.java | 98 +++++++++++- .../sail/lmdb/LmdbStoreConnectionTest.java | 35 ++++ 10 files changed, 484 insertions(+), 7 deletions(-) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java index a426eb395a4..ad9b963872c 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java @@ -93,4 +93,9 @@ public Set getSupportedOrders(Resource subj, IRI pred, Value obj public Comparator getComparator() { return delegate.getComparator(); } + + @Override + public long size(Resource subj, IRI pred, Value obj, Resource... contexts) { + return delegate.size(subj, pred, obj, contexts); + } } diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java index b47b9410ae9..632071e28f5 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java @@ -130,4 +130,9 @@ default Comparator getComparator() { return null; } + @Experimental + default long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) { + return getStatements(subj, pred, obj, contexts).stream() + .count(); + } } diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java index b90a9008657..dd616872c41 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java @@ -115,7 +115,7 @@ public CloseableIteration getNamespaces() throws SailExcept if (added == null && removed == null) { return namespaces; } - final Iterator> addedIter = added; + final Iterator> addedIter = added; final Set removedSet = removed; return new AbstractCloseableIteration<>() { @@ -383,4 +383,15 @@ private boolean isDeprecated(Triple triple, List deprecatedStatements } return true; } + + @Override + public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) { + // Fast path: no approved or deprecated + if (!changes.hasApproved() && !changes.hasDeprecated()) { + return derivedFrom.size(subj, pred, obj, contexts); + } + + // Fallback path: iterate over all matching statements + return getStatements(subj, pred, obj, contexts).stream().count(); + } } diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java index 7942984593a..7506af75cbc 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java @@ -17,6 +17,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Stream; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.order.StatementOrder; import org.eclipse.rdf4j.common.transaction.IsolationLevel; @@ -1033,4 +1034,21 @@ private boolean hasStatement(SailDataset dataset, Resource subj, IRI pred, Value } } + /** + * Returns the number of statements in the snapshot, optionally including inferred statements, for the given + * contexts. This method reads the size directly from the dataset within the current isolation level. + * + * @param includeInferred whether to include inferred statements in the count + * @param contexts the RDF contexts (named graphs) to restrict the count to; if none are provided, counts all + * contexts + * @return the number of statements in the dataset + * @throws SailException if an error occurs while accessing the Sail store + */ + @Experimental + protected long getSizeFromSnapshot(final boolean includeInferred, final Resource... contexts) throws SailException { + try (SailSource branch = branch(IncludeInferred.fromBoolean(includeInferred))) { + return branch.dataset(getIsolationLevel()).size(null, null, null, contexts); + } + } + } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java index 02e7d71bf5d..82f628face5 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java @@ -403,6 +403,68 @@ CloseableIteration createStatementIterator( } } + /** + * Returns the number of statements that match the specified pattern. + * + * @param subj The subject of the pattern, or null to indicate a wildcard. + * @param pred The predicate of the pattern, or null to indicate a wildcard. + * @param obj The object of the pattern, or null to indicate a wildcard. + * @param contexts The context(s) of the pattern. Note that this parameter is a vararg and as such is optional. If + * no contexts are supplied the method operates on the entire repository. + * @return The number of statements that match the specified pattern. + * @throws SailException If an error occurred while determining the size. + */ + private long size(final TxnManager.Txn txn, final Resource subj, final IRI pred, final Value obj, + final Resource... contexts) + throws SailException { + try { + long totalSize = 0; + + long subjID = LmdbValue.UNKNOWN_ID; + if (subj != null) { + subjID = valueStore.getId(subj); + if (subjID == LmdbValue.UNKNOWN_ID) { + return 0; + } + } + + long predID = LmdbValue.UNKNOWN_ID; + if (pred != null) { + predID = valueStore.getId(pred); + if (predID == LmdbValue.UNKNOWN_ID) { + return 0; + } + } + + long objID = LmdbValue.UNKNOWN_ID; + if (obj != null) { + objID = valueStore.getId(obj); + if (objID == LmdbValue.UNKNOWN_ID) { + return 0; + } + } + + // Handle the case where no contexts are specified (query all contexts) + if (contexts.length == 0) { + totalSize = tripleStore.cardinalityExact(txn, subjID, predID, objID, LmdbValue.UNKNOWN_ID, false); + } else { + for (Resource context : contexts) { + long contextID = LmdbValue.UNKNOWN_ID; + if (context != null) { + contextID = valueStore.getId(context); + if (contextID == LmdbValue.UNKNOWN_ID) { + return 0; + } + } + totalSize += tripleStore.cardinalityExact(txn, subjID, predID, objID, contextID, false); + } + } + return totalSize; + } catch (final IOException e) { + throw new SailException(e); + } + } + private final class LmdbSailSource extends BackingSailSource { private final boolean explicit; @@ -955,5 +1017,15 @@ public Set getSupportedOrders(Resource subj, IRI pred, Value obj public Comparator getComparator() { return null; } + + @Override + public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) + throws SailException { + try { + return LmdbSailStore.this.size(txn, subj, pred, obj, contexts); + } catch (final Exception e) { + throw new SailException(e); + } + } } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java index 8dc3c7019ed..7a3c1521acf 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java @@ -23,7 +23,7 @@ import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.SailReadOnlyException; -import org.eclipse.rdf4j.sail.base.SailSourceConnection; +import org.eclipse.rdf4j.sail.base.*; import org.eclipse.rdf4j.sail.helpers.DefaultSailChangedEvent; import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; @@ -200,4 +200,8 @@ public void clearInferred(Resource... contexts) throws SailException { sailChangedEvent.setStatementsRemoved(true); } + @Override + protected long sizeInternal(Resource... contexts) throws SailException { + return super.getSizeFromSnapshot(false, contexts); + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index ef3ab8ebc67..2611f31ac8f 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -16,6 +16,7 @@ import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.transaction; import static org.eclipse.rdf4j.sail.lmdb.Varint.readListUnsigned; import static org.eclipse.rdf4j.sail.lmdb.Varint.writeUnsigned; +import static org.lwjgl.system.MemoryStack.stackGet; import static org.lwjgl.system.MemoryStack.stackPush; import static org.lwjgl.system.MemoryUtil.NULL; import static org.lwjgl.util.lmdb.LMDB.MDB_CREATE; @@ -85,6 +86,7 @@ import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.RecordCacheIterator; import org.eclipse.rdf4j.sail.lmdb.Varint.GroupMatcher; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; import org.lwjgl.PointerBuffer; import org.lwjgl.system.MemoryStack; import org.lwjgl.util.lmdb.MDBEnvInfo; @@ -656,6 +658,66 @@ protected void filterUsedIds(Collection ids) throws IOException { }); } + /** + * Returns the exact total size of the triple pattern with the given subject, predicate, object and context. If the + * subject, predicate, object or context is not specified (i.e., set to {@link LmdbValue#UNKNOWN_ID}), it will + * return the size of the entire database from the mdb_stat. Otherwise, it will iterate over all matching triples + * and count them. + * + * @param subj Subject ID or {@link LmdbValue#UNKNOWN_ID} if not specified + * @param pred Predicate ID or {@link LmdbValue#UNKNOWN_ID} if not specified + * @param obj Object ID or {@link LmdbValue#UNKNOWN_ID} if not specified + * @param context Context ID or {@link LmdbValue#UNKNOWN_ID} if not specified + * @param includeImplicit Whether to include implicit triples in the count + * @return The exact size of the triple pattern + */ + protected long cardinalityExact(final TxnManager.Txn txn, final long subj, final long pred, final long obj, + final long context, final boolean includeImplicit) + throws IOException { + + if (subj == LmdbValue.UNKNOWN_ID && pred == LmdbValue.UNKNOWN_ID && obj == LmdbValue.UNKNOWN_ID) { + try (final MemoryStack stack = MemoryStack.stackPush()) { + // Fast path: if all values are unknown, return the total size of the database + if (context == LmdbValue.UNKNOWN_ID) { + long cardinality = 0; + final TripleIndex index = getBestIndex(subj, pred, obj, context); + + int dbi = index.getDB(true); + MDBStat stat = MDBStat.mallocStack(stack); + mdb_stat(txn.get(), dbi, stat); + cardinality += stat.ms_entries(); + + if (includeImplicit) { + dbi = index.getDB(false); + mdb_stat(txn.get(), dbi, stat); + cardinality += stat.ms_entries(); + } + return cardinality; + } else { + // Fast path: if only context is specified, return the size of the given context + return getContextSize(txn, stack, context); + } + } + } + + long size = 0; + + try (RecordIterator explicitIter = getTriples(txn, subj, pred, obj, context, true); + RecordIterator implicitIter = includeImplicit + ? getTriples(txn, subj, pred, obj, context, false) + : null) { + for (long[] quad = explicitIter.next(); quad != null; quad = explicitIter.next()) { + size++; + } + if (includeImplicit && implicitIter != null) { + for (long[] quad = implicitIter.next(); quad != null; quad = implicitIter.next()) { + size++; + } + } + } + return size; + } + protected double cardinality(long subj, long pred, long obj, long context) throws IOException { TripleIndex index = getBestIndex(subj, pred, obj, context); @@ -673,7 +735,6 @@ protected double cardinality(long subj, long pred, long obj, long context) throw return cardinality; }); } - return txnManager.doWith((stack, txn) -> { final Statistics s = pool.getStatistics(); try { @@ -910,6 +971,33 @@ public boolean storeTriple(long subj, long pred, long obj, long context, boolean return stAdded; } + private long getContextSize(final Txn txn, final MemoryStack stack, final long context) throws IOException { + try { + stack.push(); + + // Prepare key + MDBVal idVal = MDBVal.calloc(stack); + ByteBuffer keyBuffer = stack.malloc(1 + Long.BYTES); + Varint.writeUnsigned(keyBuffer, context); + keyBuffer.flip(); + idVal.mv_data(keyBuffer); + + // Prepare value holder + MDBVal dataVal = MDBVal.calloc(stack); + int rc = mdb_get(txn.get(), contextsDbi, idVal, dataVal); + if (rc == MDB_SUCCESS && dataVal.mv_data() != null) { + return Varint.readUnsigned(dataVal.mv_data()); + } else if (rc == MDB_NOTFOUND) { + // Context not present in DB + return 0; + } else { + throw new IOException("Failed to read context size: " + mdb_strerror(rc)); + } + } finally { + stack.pop(); + } + } + private void incrementContext(MemoryStack stack, long context) throws IOException { try { stack.push(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java new file mode 100644 index 00000000000..deeda4be1a3 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java @@ -0,0 +1,149 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.util.Random; + +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CardinalityExactTest { + private static final int NUM_RESOURCES = 1000; + private static final int MIN_TRIPLES_PER_RESOURCE = 20; + private static final int MAX_TRIPLES_PER_RESOURCE = 100; + private final int[] contextIds = new int[] { 1, 2, 3 }; + private final int[] objectIds = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + @TempDir + File tempFolder; + + protected TripleStore tripleStore; + + @BeforeEach + public void before() throws Exception { + File dataDir = new File(tempFolder, "triplestore"); + dataDir.mkdir(); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc")); + } + + private long countTriples(RecordIterator iterator) { + long count = 0; + while (iterator.next() != null) { + count++; + } + return count; + } + + private long randomObjectId(Random random) { + return objectIds[random.nextInt(objectIds.length)]; + } + + private long randomContextId(Random random) { + return contextIds[random.nextInt(contextIds.length)]; + } + + @Test + public void testCardinalityExact() throws Exception { + Random random = new Random(); + + tripleStore.startTransaction(); + + for (int resourceId = 1; resourceId <= NUM_RESOURCES; resourceId++) { + int tripleCount = MIN_TRIPLES_PER_RESOURCE + random.nextInt(MAX_TRIPLES_PER_RESOURCE); + for (int i = 0; i < tripleCount; i++) { + long objectId = randomObjectId(random); + long randomContextId = randomContextId(random); + tripleStore.storeTriple(resourceId, 2, objectId, randomContextId, true); + + int predicateId = 2 + random.nextInt(1000) + 1; + tripleStore.storeTriple(resourceId, predicateId, randomObjectId(random), randomContextId, true); + } + } + + tripleStore.commit(); + + try (TxnManager.Txn txn = tripleStore.getTxnManager().createReadTxn()) { + for (final long contextId : contextIds) { + long actual = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID, + contextId, true); + long expected = countTriples( + tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID, contextId, false)) + + countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID, + contextId, true)); + assertEquals(expected, actual, "Exact size does not match counted triples."); + + } + + for (final long objectId : objectIds) { + long explicitActual = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + objectId, + LmdbValue.UNKNOWN_ID, false); + long totalActual = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + objectId, + LmdbValue.UNKNOWN_ID, true); + long implicitExpected = countTriples( + tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId, + LmdbValue.UNKNOWN_ID, false)); + long explicitExpected = countTriples( + tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId, + LmdbValue.UNKNOWN_ID, true)); + + assertEquals(explicitExpected, explicitActual); + assertEquals(totalActual, implicitExpected + explicitExpected, + "Exact size does not match counted triples."); + } + + for (int resourceId = 1; resourceId <= NUM_RESOURCES; resourceId++) { + long totalExactSize = tripleStore.cardinalityExact(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, true); + long expectedCount = countTriples( + tripleStore.getTriples(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, false)) + + countTriples(tripleStore.getTriples(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, true)); + assertEquals(expectedCount, totalExactSize, "Exact size does not match counted triples."); + } + + for (int resourceId = 1; resourceId <= 50; resourceId++) { + long targetObjectId = randomObjectId(random); + long targetContextId = randomContextId(random); + long generalSize = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + targetObjectId, + targetContextId, false); + long generalExplicitCount = countTriples( + tripleStore.getTriples( + txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, targetObjectId, targetContextId, + true)); + assertEquals( + generalExplicitCount, generalSize, + "Exact size does not match counted triples." + ); + } + + long totalSize = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + LmdbValue.UNKNOWN_ID, + LmdbValue.UNKNOWN_ID, true); + long totalCount = countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, false)) + + countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, true)); + assertEquals(totalCount, totalSize, "Total size does not match counted triples."); + } + } + + @AfterEach + public void after() throws Exception { + tripleStore.close(); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java index 2e416067a18..e4f81e3cc73 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java @@ -17,10 +17,7 @@ import java.io.File; import org.eclipse.rdf4j.common.transaction.IsolationLevels; -import org.eclipse.rdf4j.model.IRI; -import org.eclipse.rdf4j.model.Resource; -import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.*; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDFS; import org.eclipse.rdf4j.query.TupleQuery; @@ -30,9 +27,12 @@ import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; /** * Extended test for {@link LmdbStore}. @@ -193,6 +193,96 @@ public void testPassConnectionBetweenThreadsWithTx() throws InterruptedException } } + @ParameterizedTest + @EnumSource(IsolationLevels.class) + public void testSizeIsolationLevels(final IsolationLevels isolationLevel) { + try (final RepositoryConnection conn1 = repo.getConnection(); + final RepositoryConnection conn2 = repo.getConnection()) { + final int baseSize = 3; // S0, S1, S2 + Assertions.assertEquals(baseSize, conn1.size(), "Size should be " + baseSize); + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + final int count = 100; + conn1.begin(isolationLevel); + conn2.begin(isolationLevel); + for (int i = 0; i < count; i++) { + conn1.add(F.createStatement(F.createIRI("http://example.org/" + i), RDFS.LABEL, + F.createLiteral("label" + i))); + } + // conn1 should see its own changes + Assertions.assertEquals(baseSize + count, conn1.size(), "Size should be " + (3 + count)); + + // LMDBStore supports: NONE, READ_COMMITTED, SNAPSHOT_READ, SNAPSHOT, and SERIALIZABLE. + // If an unsupported level (e.g., READ_UNCOMMITTED) is requested, + // a stronger supported level (e.g., READ_COMMITTED) is used instead. + if (isolationLevel.equals(IsolationLevels.NONE)) { + // conn2 should see the changes of conn1 + Assertions.assertEquals(baseSize + count, conn2.size(), "Size should be " + (3 + count)); + } else if (isolationLevel.equals(IsolationLevels.READ_UNCOMMITTED)) { + // Use a stronger level (READ_COMMITTED) instead of READ_UNCOMMITTED + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + (3 + count)); + } else if (isolationLevel.equals(IsolationLevels.READ_COMMITTED)) { + // conn2 should not see the changes of conn1 + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + } else if (isolationLevel.equals(IsolationLevels.SNAPSHOT_READ)) { + // conn2 should not see the changes of conn1 + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + } else if (isolationLevel.equals(IsolationLevels.SNAPSHOT)) { + // conn2 should not see the changes of conn1 + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + } else if (isolationLevel.equals(IsolationLevels.SERIALIZABLE)) { + // conn2 should not see the changes of conn1 + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + } else { + Assertions.fail("Unsupported isolation level: " + isolationLevel); + } + conn1.commit(); + // conn2 should see the changes of conn1 after commit + if (isolationLevel.equals(IsolationLevels.READ_COMMITTED) + || isolationLevel.equals(IsolationLevels.READ_UNCOMMITTED) + || isolationLevel.equals(IsolationLevels.SNAPSHOT_READ)) { + Assertions.assertEquals(baseSize + count, conn2.size(), "Size should be " + (3 + count)); + } + conn2.commit(); + Assertions.assertEquals(baseSize + count, conn2.size(), "Size should be " + (3 + count)); + } + } + + @ParameterizedTest + @EnumSource(value = IsolationLevels.class, names = "NONE", mode = EnumSource.Mode.EXCLUDE) + public void testSizeWhenRollbackTxn(final IsolationLevels isolationLevel) { + try (RepositoryConnection conn1 = repo.getConnection(); + RepositoryConnection conn2 = repo.getConnection()) { + + final int baseSize = 3; // S0, S1, S2 + Assertions.assertEquals(baseSize, conn1.size(), "Initial size in conn1 should be " + baseSize); + Assertions.assertEquals(baseSize, conn2.size(), "Initial size in conn2 should be " + baseSize); + + final int count = 50; + + conn1.begin(isolationLevel); + conn2.begin(isolationLevel); + + for (int i = 0; i < count; i++) { + conn1.add(F.createStatement(F.createIRI("http://example.org/rollback/" + i), RDFS.LABEL, + F.createLiteral("rollback" + i))); + } + + // conn1 sees its uncommitted changes + Assertions.assertEquals(baseSize + count, conn1.size(), "conn1 should see uncommitted additions"); + + // conn2 should NOT see uncommitted changes + Assertions.assertEquals(baseSize, conn2.size(), "conn2 should not see uncommitted changes"); + + conn1.rollback(); + + // After rollback, both connections should see base size + Assertions.assertEquals(baseSize, conn1.size(), "conn1 should not see rolled-back additions"); + Assertions.assertEquals(baseSize, conn2.size(), "conn2 should not see rolled-back additions"); + + conn2.commit(); + } + } + @AfterEach public void after() { repo.shutDown(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnectionTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnectionTest.java index c6785e1ba60..ea0ffd760b6 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnectionTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnectionTest.java @@ -13,6 +13,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.File; +import java.util.Random; import org.eclipse.rdf4j.common.iteration.Iterations; import org.eclipse.rdf4j.common.transaction.IsolationLevel; @@ -22,6 +23,7 @@ import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.eclipse.rdf4j.testsuite.repository.RepositoryConnectionTest; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; @@ -64,4 +66,37 @@ public void testSES715(IsolationLevel level) { testCon2.close(); } + @ParameterizedTest + @MethodSource("parameters") + public void testSize(final IsolationLevel level) { + setupTest(level); + + ValueFactory vf = testCon.getValueFactory(); + IRI context1 = vf.createIRI("http://my.context.1"); + IRI context2 = vf.createIRI("http://my.context.2"); + IRI predicate = vf.createIRI("http://my.predicate"); + IRI object = vf.createIRI("http://my.object"); + Random random = new Random(); + int context1Size = random.nextInt(5000); + int context2Size = random.nextInt(5000); + for (int j = 0; j < context1Size; j++) { + testCon.add(vf.createIRI("http://my.subject" + j), predicate, object, context1); + } + for (int j = 0; j < context2Size; j++) { + testCon.add(vf.createIRI("http://my.subject" + j), predicate, object, context2); + } + assertEquals(context1Size, testCon.size(context1)); + assertEquals(context2Size, testCon.size(context2)); + assertEquals(context1Size + context2Size, testCon.size()); + + testCon.clear(context1); + assertEquals(0, testCon.size(context1)); + assertEquals(context2Size, testCon.size(context2)); + testCon.commit(); + + assertEquals(0, testCon2.size(context1)); + assertEquals(context2Size, testCon2.size(context2)); + + testCon2.close(); + } } From a1a079b86a5bb7af9b3befe000f6cf92b3da7723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sat, 4 Oct 2025 20:54:08 +0200 Subject: [PATCH 02/18] merge develop --- .../java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java | 3 ++- .../src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java index 85dafbb2058..acf6bbcc332 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java @@ -23,7 +23,7 @@ import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.SailReadOnlyException; -import org.eclipse.rdf4j.sail.base.*; +import org.eclipse.rdf4j.sail.base.SailSourceConnection; import org.eclipse.rdf4j.sail.helpers.DefaultSailChangedEvent; import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; @@ -206,6 +206,7 @@ protected void closeInternal() throws SailException { // release thread-local pool Pool.release(); } + @Override protected long sizeInternal(Resource... contexts) throws SailException { return super.getSizeFromSnapshot(false, contexts); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index cccd56c05b3..c0727c4f17b 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -15,7 +15,6 @@ import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.readTransaction; import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.transaction; import static org.eclipse.rdf4j.sail.lmdb.Varint.readQuadUnsigned; -import static org.lwjgl.system.MemoryStack.stackGet; import static org.lwjgl.system.MemoryStack.stackPush; import static org.lwjgl.system.MemoryUtil.NULL; import static org.lwjgl.util.lmdb.LMDB.MDB_CREATE; From 1eec02bc535b6d2246b6601b445c30e382335237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sat, 4 Oct 2025 21:04:36 +0200 Subject: [PATCH 03/18] fixes --- .../org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java | 2 ++ .../main/java/org/eclipse/rdf4j/sail/base/SailDataset.java | 5 +++-- .../java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java | 6 +++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java index ad9b963872c..51003a8264a 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java @@ -13,6 +13,7 @@ import java.util.Comparator; import java.util.Set; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.order.StatementOrder; import org.eclipse.rdf4j.model.IRI; @@ -94,6 +95,7 @@ public Comparator getComparator() { return delegate.getComparator(); } + @Experimental @Override public long size(Resource subj, IRI pred, Value obj, Resource... contexts) { return delegate.size(subj, pred, obj, contexts); diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java index 632071e28f5..38c19d6e3de 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java @@ -132,7 +132,8 @@ default Comparator getComparator() { @Experimental default long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) { - return getStatements(subj, pred, obj, contexts).stream() - .count(); + try (CloseableIteration statements = getStatements(subj, pred, obj, contexts)) { + return statements.stream().count(); + } } } diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java index 5a7a27c8b42..9c8cdf759d5 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java @@ -21,6 +21,7 @@ import java.util.Set; import java.util.function.Function; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.AbstractCloseableIteration; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; @@ -383,6 +384,7 @@ private boolean isDeprecated(Triple triple, List deprecatedStatements return true; } + @Experimental @Override public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) { // Fast path: no approved or deprecated @@ -391,6 +393,8 @@ public long size(final Resource subj, final IRI pred, final Value obj, final Res } // Fallback path: iterate over all matching statements - return getStatements(subj, pred, obj, contexts).stream().count(); + try (CloseableIteration statements = getStatements(subj, pred, obj, contexts)) { + return statements.stream().count(); + } } } From 2678230311846ff19dc02e8aa538003f96752053 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sat, 4 Oct 2025 21:13:28 +0200 Subject: [PATCH 04/18] tests --- .../rdf4j/sail/lmdb/CardinalityExactTest.java | 2 +- .../rdf4j/sail/lmdb/LmdbSailStoreTest.java | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java index deeda4be1a3..bf6a1700b12 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java @@ -37,7 +37,7 @@ public class CardinalityExactTest { public void before() throws Exception { File dataDir = new File(tempFolder, "triplestore"); dataDir.mkdir(); - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc")); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); } private long countTriples(RecordIterator iterator) { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java index 04e66f5197d..bfb9fdda9fd 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java @@ -106,6 +106,34 @@ public void testRemoveInvalidContext() { } } + @Test + public void testSizeNullContextCountsDefaultGraphOnly() { + try (RepositoryConnection conn = repo.getConnection()) { + assertEquals("size(null) must count default graph only", 1, conn.size((Resource) null)); + } + } + + @Test + public void testSizeUnknownContextIsZero() { + try (RepositoryConnection conn = repo.getConnection()) { + assertEquals("size(unknownCtx) must be zero", 0, conn.size(CTX_INV)); + } + } + + @Test + public void testSizeMixedValidAndUnknownSkipsUnknown() { + try (RepositoryConnection conn = repo.getConnection()) { + assertEquals("size(valid,unknown) must equal size(valid)", 1, conn.size(CTX_1, CTX_INV)); + } + } + + @Test + public void testSizeNullAndValidCountsUnion() { + try (RepositoryConnection conn = repo.getConnection()) { + assertEquals("size(null,valid) must count default + valid", 2, conn.size((Resource) null, CTX_1)); + } + } + @Test public void testRemoveMultipleValidContext() { try (RepositoryConnection conn = repo.getConnection()) { From 2c8474475fe9d2d3dcc44cb19474901c3ba7ade7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sat, 4 Oct 2025 21:18:47 +0200 Subject: [PATCH 05/18] fix --- .../rdf4j/sail/lmdb/LmdbSailStore.java | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java index ae94ee9b423..0406e01e4f0 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java @@ -454,19 +454,27 @@ private long size(final TxnManager.Txn txn, final Resource subj, final IRI pred, } } - // Handle the case where no contexts are specified (query all contexts) + // Handle context selection mirroring getStatements semantics if (contexts.length == 0) { + // wildcard across all contexts totalSize = tripleStore.cardinalityExact(txn, subjID, predID, objID, LmdbValue.UNKNOWN_ID, false); } else { for (Resource context : contexts) { - long contextID = LmdbValue.UNKNOWN_ID; - if (context != null) { - contextID = valueStore.getId(context); - if (contextID == LmdbValue.UNKNOWN_ID) { - return 0; + Long contextIDToCount = null; + if (context == null) { + // default graph + contextIDToCount = 0L; + } else if (!context.isTriple()) { + long contextID = valueStore.getId(context); + // skip unknown (non-existent) contexts; do not early-return + if (contextID != LmdbValue.UNKNOWN_ID) { + contextIDToCount = contextID; } } - totalSize += tripleStore.cardinalityExact(txn, subjID, predID, objID, contextID, false); + + if (contextIDToCount != null) { + totalSize += tripleStore.cardinalityExact(txn, subjID, predID, objID, contextIDToCount, false); + } } } return totalSize; From 2e0e192fb525436047b837c26f90518971d6420f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sat, 4 Oct 2025 21:30:36 +0200 Subject: [PATCH 06/18] fix --- .../org/eclipse/rdf4j/sail/base/SailSourceConnection.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java index 4111264043a..6d2041831a5 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java @@ -1048,7 +1048,9 @@ private boolean hasStatement(SailDataset dataset, Resource subj, IRI pred, Value @Experimental protected long getSizeFromSnapshot(final boolean includeInferred, final Resource... contexts) throws SailException { try (SailSource branch = branch(IncludeInferred.fromBoolean(includeInferred))) { - return branch.dataset(getIsolationLevel()).size(null, null, null, contexts); + try (SailDataset dataset = branch.dataset(getIsolationLevel())) { + return dataset.size(null, null, null, contexts); + } } } From 67d173a1c5e5642e76ca519dd407a4549037472a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:09:51 +0200 Subject: [PATCH 07/18] fix --- .../eclipse/rdf4j/sail/lmdb/TripleStore.java | 8 +- .../lmdb/LmdbContextSizeExplicitOnlyTest.java | 85 +++++++++++++++++++ 2 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index c0727c4f17b..af9a1071323 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -710,8 +710,12 @@ protected long cardinalityExact(final TxnManager.Txn txn, final long subj, final } return cardinality; } else { - // Fast path: if only context is specified, return the size of the given context - return getContextSize(txn, stack, context); + // Fast path: if only context is specified. Only use the precomputed + // context size when including implicit statements; otherwise fall through + // and count explicit-only via iteration below. + if (includeImplicit) { + return getContextSize(txn, stack, context); + } } } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java new file mode 100644 index 00000000000..110b23d85a4 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java @@ -0,0 +1,85 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.SailConnection; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Reproduces regression: context-size must be explicit-only. The LMDB fast path currently counts inferred statements + * for context-only size queries. + */ +public class LmdbContextSizeExplicitOnlyTest { + + private Path tmpDir; + private LmdbStore store; + + @BeforeEach + public void setUp() throws IOException { + tmpDir = Files.createTempDirectory("rdf4j-lmdb-test-"); + store = new LmdbStore(tmpDir.toFile()); + store.init(); + } + + @AfterEach + public void tearDown() throws IOException { + if (store != null) { + store.shutDown(); + } + if (tmpDir != null) { + // best-effort cleanup + Files.walk(tmpDir) + .sorted((a, b) -> b.compareTo(a)) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (IOException ignore) { + } + }); + } + } + + @Test + public void sizeContext_excludesInferred() throws Exception { + try (SailConnection raw = store.getConnection()) { + LmdbStoreConnection conn = (LmdbStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + // one explicit in ctx + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + // one inferred in the same ctx (simulate inference via addInferredStatement) + conn.addInferredStatement(vf.createIRI("urn:s2"), p, vf.createLiteral("y"), ctx); + conn.commit(); + + // size must exclude inferred statements + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + } + } +} From 236b66f6ab0e7f0df199606fa44318479393c210 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:18:18 +0200 Subject: [PATCH 08/18] fix --- .../rdf4j/sail/lmdb/LmdbSailStore.java | 10 ++-- .../eclipse/rdf4j/sail/lmdb/TripleStore.java | 8 ++- .../rdf4j/sail/lmdb/CardinalityExactTest.java | 3 +- .../rdf4j/sail/lmdb/CardinalityTest.java | 3 +- .../rdf4j/sail/lmdb/DefaultIndexTest.java | 7 +-- .../lmdb/LmdbContextSizeExplicitOnlyTest.java | 52 ++++++++++++++++++- .../sail/lmdb/RecordIteratorBenchmark.java | 3 +- .../sail/lmdb/TripleIndexToKeyCacheTest.java | 3 +- .../sail/lmdb/TripleStoreAutoGrowTest.java | 3 +- .../sail/lmdb/TripleStoreManyIndexesTest.java | 3 +- .../rdf4j/sail/lmdb/TripleStoreTest.java | 3 +- 11 files changed, 80 insertions(+), 18 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java index 0406e01e4f0..12d1f68f8fd 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java @@ -71,7 +71,7 @@ class LmdbSailStore implements SailStore { private boolean multiThreadingActive; private volatile boolean asyncTransactionFinished; private volatile boolean nextTransactionAsync; - private volatile boolean mayHaveInferred; + private final AtomicBoolean mayHaveInferred = new AtomicBoolean(); boolean enableMultiThreading = true; @@ -144,7 +144,7 @@ class AddQuadOperation implements Operation { @Override public void execute() throws IOException { if (!explicit) { - mayHaveInferred = true; + mayHaveInferred.setRelease(true); } if (!unusedIds.isEmpty()) { // these ids are used again @@ -196,8 +196,8 @@ public LmdbSailStore(File dataDir, LmdbStoreConfig config) throws IOException, S namespaceStore = new NamespaceStore(dataDir); var valueStore = new ValueStore(new File(dataDir, "values"), config); this.valueStore = valueStore; - tripleStore = new TripleStore(new File(dataDir, "triples"), config, valueStore); - mayHaveInferred = tripleStore.hasTriples(false); + tripleStore = new TripleStore(new File(dataDir, "triples"), config, valueStore, mayHaveInferred); + mayHaveInferred.setRelease(tripleStore.hasTriples(false)); initialized = true; } finally { if (!initialized) { @@ -353,7 +353,7 @@ protected void handleClose() throws SailException { */ CloseableIteration createStatementIterator( Txn txn, Resource subj, IRI pred, Value obj, boolean explicit, Resource... contexts) throws IOException { - if (!explicit && !mayHaveInferred) { + if (!explicit && !mayHaveInferred.getAcquire()) { // there are no inferred statements and the iterator should only return inferred statements return CloseableIteration.EMPTY_STATEMENT_ITERATION; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index af9a1071323..a0c69b59e03 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -75,6 +75,7 @@ import java.util.Set; import java.util.StringTokenizer; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.LongAdder; import java.util.function.Consumer; @@ -162,6 +163,7 @@ class TripleStore implements Closeable { */ private final List indexes = new ArrayList<>(); private final ValueStore valueStore; + private final AtomicBoolean mayHaveInferred; private long env; private int contextsDbi; @@ -195,11 +197,13 @@ public int compareRegion(ByteBuffer array1, int startIdx1, ByteBuffer array2, in } }; - TripleStore(File dir, LmdbStoreConfig config, ValueStore valueStore) throws IOException, SailException { + TripleStore(File dir, LmdbStoreConfig config, ValueStore valueStore, AtomicBoolean mayHaveInferred) + throws IOException, SailException { this.dir = dir; this.forceSync = config.getForceSync(); this.autoGrow = config.getAutoGrow(); this.valueStore = valueStore; + this.mayHaveInferred = mayHaveInferred; // create directory if it not exists this.dir.mkdirs(); @@ -713,7 +717,7 @@ protected long cardinalityExact(final TxnManager.Txn txn, final long subj, final // Fast path: if only context is specified. Only use the precomputed // context size when including implicit statements; otherwise fall through // and count explicit-only via iteration below. - if (includeImplicit) { + if (includeImplicit || !mayHaveInferred.getAcquire()) { return getContextSize(txn, stack, context); } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java index bf6a1700b12..f2654b0fa21 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java @@ -14,6 +14,7 @@ import java.io.File; import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; @@ -37,7 +38,7 @@ public class CardinalityExactTest { public void before() throws Exception { File dataDir = new File(tempFolder, "triplestore"); dataDir.mkdir(); - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); } private long countTriples(RecordIterator iterator) { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java index 685db90c83a..5a53fcf6fb5 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java @@ -12,6 +12,7 @@ import java.io.File; import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.sail.lmdb.TxnManager.Txn; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; @@ -38,7 +39,7 @@ public class CardinalityTest { public void before() throws Exception { File dataDir = new File(tempFolder, "triplestore"); dataDir.mkdir(); - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); } int count(RecordIterator it) { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java index b1e8b23df7c..8cb6c36e1a4 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java @@ -16,6 +16,7 @@ import java.io.FileInputStream; import java.io.InputStream; import java.util.Properties; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.common.io.FileUtil; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; @@ -26,7 +27,7 @@ public class DefaultIndexTest { @Test public void testDefaultIndex(@TempDir File dir) throws Exception { - TripleStore store = new TripleStore(dir, new LmdbStoreConfig(), null); + TripleStore store = new TripleStore(dir, new LmdbStoreConfig(), null, new AtomicBoolean(false)); store.close(); // check that the triple store used the default index assertEquals("spoc,posc", findIndex(dir)); @@ -36,11 +37,11 @@ public void testDefaultIndex(@TempDir File dir) throws Exception { @Test public void testExistingIndex(@TempDir File dir) throws Exception { // set a non-default index - TripleStore store = new TripleStore(dir, new LmdbStoreConfig("spoc,opsc"), null); + TripleStore store = new TripleStore(dir, new LmdbStoreConfig("spoc,opsc"), null, new AtomicBoolean(false)); store.close(); String before = findIndex(dir); // check that the index is preserved with a null value - store = new TripleStore(dir, new LmdbStoreConfig(null), null); + store = new TripleStore(dir, new LmdbStoreConfig(null), null, new AtomicBoolean(false)); store.close(); assertEquals(before, findIndex(dir)); FileUtil.deleteDir(dir); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java index 110b23d85a4..ca115d889b8 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java @@ -59,7 +59,7 @@ public void tearDown() throws IOException { } @Test - public void sizeContext_excludesInferred() throws Exception { + public void sizeContext_excludesInferred() { try (SailConnection raw = store.getConnection()) { LmdbStoreConnection conn = (LmdbStoreConnection) raw; conn.begin(); @@ -78,6 +78,56 @@ public void sizeContext_excludesInferred() throws Exception { long contextSize = conn.size(ctx); assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + + } + } + + @Test + public void sizeContext_excludesInferred2() { + try (SailConnection raw = store.getConnection()) { + LmdbStoreConnection conn = (LmdbStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + // one explicit in ctx + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + // one inferred in the same ctx (simulate inference via addInferredStatement) + conn.addInferredStatement(vf.createIRI("urn:s2"), p, vf.createLiteral("y"), ctx); + + // size must exclude inferred statements + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + + conn.commit(); + } + } + + @Test + public void sizeContext() { + try (SailConnection raw = store.getConnection()) { + LmdbStoreConnection conn = (LmdbStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + // one explicit in ctx + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + conn.commit(); + + // size must exclude inferred statements + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + long totalSize = conn.size(); assertEquals(1L, totalSize, "total size must exclude inferred statements"); } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RecordIteratorBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RecordIteratorBenchmark.java index db6d2da0486..491e1b54b0d 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RecordIteratorBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RecordIteratorBenchmark.java @@ -14,6 +14,7 @@ import java.io.File; import java.io.IOException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.io.FileUtils; import org.assertj.core.util.Files; @@ -56,7 +57,7 @@ public class RecordIteratorBenchmark { @Setup(Level.Trial) public void setup() throws IOException { dataDir = Files.newTemporaryFolder(); - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); final int statements = 1_000_000; tripleStore.startTransaction(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java index 0e82f5246fe..47c69844b07 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java @@ -14,6 +14,7 @@ import java.io.File; import java.nio.ByteBuffer; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.AfterEach; @@ -32,7 +33,7 @@ class TripleIndexToKeyCacheTest { @BeforeEach void setup(@TempDir File dataDir) throws Exception { // Create a small store; index set is irrelevant for constructing standalone TripleIndex instances - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); } @AfterEach diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreAutoGrowTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreAutoGrowTest.java index afcfc5e64b0..bfb46a1469c 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreAutoGrowTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreAutoGrowTest.java @@ -12,6 +12,7 @@ import java.io.File; import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.sail.lmdb.TxnManager.Txn; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; @@ -34,7 +35,7 @@ public class TripleStoreAutoGrowTest { public void before(@TempDir File dataDir) throws Exception { var config = new LmdbStoreConfig("spoc,posc"); config.setTripleDBSize(4096 * 10); - tripleStore = new TripleStore(dataDir, config, null); + tripleStore = new TripleStore(dataDir, config, null, new AtomicBoolean(false)); ((Logger) LoggerFactory .getLogger(TripleStore.class.getName())) .setLevel(ch.qos.logback.classic.Level.DEBUG); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreManyIndexesTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreManyIndexesTest.java index f6e7ca850a9..53765c7f088 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreManyIndexesTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreManyIndexesTest.java @@ -13,6 +13,7 @@ import static org.junit.Assert.assertNotNull; import java.io.File; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.BeforeEach; @@ -33,7 +34,7 @@ public void before(@TempDir File dataDir) throws Exception { @Test public void testSixIndexes() throws Exception { TripleStore tripleStore = new TripleStore(dataDir, - new LmdbStoreConfig("spoc,posc,ospc,cspo,cpos,cosp"), null); + new LmdbStoreConfig("spoc,posc,ospc,cspo,cpos,cosp"), null, new AtomicBoolean(false)); tripleStore.startTransaction(); tripleStore.storeTriple(1, 2, 3, 1, true); tripleStore.commit(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreTest.java index 336c22b9378..80febc408f0 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreTest.java @@ -16,6 +16,7 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import org.eclipse.rdf4j.sail.lmdb.TxnManager.Txn; @@ -33,7 +34,7 @@ public class TripleStoreTest { @BeforeEach public void before(@TempDir File dataDir) throws Exception { - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); } int count(RecordIterator it) { From 905f740781880d5a1bc112fbe00df571e30b8af2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:24:21 +0200 Subject: [PATCH 09/18] wip --- AGENTS_temp.md | 720 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 720 insertions(+) create mode 100644 AGENTS_temp.md diff --git a/AGENTS_temp.md b/AGENTS_temp.md new file mode 100644 index 00000000000..af461b6082d --- /dev/null +++ b/AGENTS_temp.md @@ -0,0 +1,720 @@ +# AGENTS.md + +Welcome, AI Agent! Your persistence, curiosity, and craftsmanship make a difference. Take your time, work methodically, validate thoroughly, and iterate. This repository is large and tests can take time — that’s expected and supported. + +You need to read the entire AGENTS.md file and follow all instructions exactly. Keep this fresh in your context as you work. + +> **Timebox:** Aim to complete each autonomous run in **15–30 minutes**. + +--- + +## Read‑Me‑Now: Proportional Test‑First Rule (Default) + +**Default:** Use **test‑first (TDD)** for any change that alters externally observable behavior. + +**Proportional exceptions:** You may **skip writing a new failing test** *only* when **all** Routine B gates (below) pass, or when using Routine C (Spike/Investigate) with **no production code changes**. + +**You may not touch production code for behavior‑changing work until a smallest‑scope failing automated test exists inside this repo and you have captured its report snippet.** A user‑provided stack trace or “obvious” contract violation is **not** a substitute for an in‑repo failing test. + +**Auto‑stop:** If you realize you patched production before creating/observing the failing test for behavior‑changing work, **stop**, revert the patch, and resume from “Reproduce first”. + +**Traceability trio (must appear in your handoff):** +1. **Descritpion** (what you’re about to do) +2. **Evidence** (Surefire/Failsafe snippet from this repo) +3. **Plan** (one and only one `in_progress` step) + +It is illegal to `-am` when running tests! +It is illegal to `-q` when running tests! + +> **Clarification:** For **strictly behavior‑neutral refactors** that are already **fully exercised by existing tests**, or for **bugfixes with an existing failing test**, you may use **Routine B — Change without new tests**. In that case you must capture **pre‑change passing evidence** at the smallest scope that hits the code you’re about to edit, prove **Hit Proof**, then show **post‑change passing evidence** from the **same selection**. +> **No exceptions for any behavior‑changing change** — for those, you must follow **Routine A — Full TDD**. + +--- + +## Three Routines: Choose Your Path + +**Routine A — Full TDD (Default)** +**Routine B — Change without new tests (Proportional, gated)** +**Routine C — Spike/Investigate (No production changes)** + +### Decision quickstart + +1. **Is new externally observable behavior required?** + → **Yes:** **Routine A (Full TDD)**. Add the smallest failing test first. + → **No:** continue. + +2. **Does a failing test already exist in this repo that pinpoints the issue?** + → **Yes:** **Routine B (Bugfix using existing failing test).** + → **No:** continue. + +3. **Is the edit strictly behavior‑neutral, local in scope, and clearly hit by existing tests?** + → **Yes:** **Routine B (Refactor/micro‑perf/documentation/build).** + → **No or unsure:** continue. + +4. **Is this purely an investigation/design spike with no production code changes?** + → **Yes:** **Routine C (Spike/Investigate).** + → **No or unsure:** **Routine A.** + +**When in doubt, choose Routine A (Full TDD).** Ambiguity is risk; tests are insurance. + +--- + +## Proportionality Model (Think before you test) + +Score the change on these lenses. If any are **High**, prefer **Routine A**. + +- **Behavioral surface:** affects outputs, serialization, parsing, APIs, error text, timing/order? +- **Blast radius:** number of modules/classes touched; public vs internal. +- **Reversibility:** quick revert vs migration/data change. +- **Observability:** can existing tests or assertions expose regressions? +- **Coverage depth:** do existing tests directly hit the edited code? +- **Concurrency / IO / Time:** any risk here is **High** by default. + +--- + +## Purpose & Contract + +* **Bold goal:** deliver correct, minimal, well‑tested changes with clear handoff. Fix root causes; avoid hacks. +* **Bias to action:** when inputs are ambiguous, choose a reasonable path, state assumptions, and proceed. +* **Ask only when blocked or irreversible:** permissions, missing deps, conflicting requirements, destructive repo‑wide changes. +* **Definition of Done** + * Code formatted and imports sorted. + * Compiles with a quick profile / targeted modules. + * Relevant module tests pass; failures triaged or crisply explained. + * Only necessary files changed; headers correct for new files. + * Clear final summary: what changed, why, where, how verified, next steps. + * **Evidence present:** failing test output (pre‑fix) and passing output (post‑fix) are shown for Routine A; for Routine B show **pre/post green** from the **same selection** plus **Hit Proof**. + +### No Monkey‑Patching or Band‑Aid Fixes (Non‑Negotiable) + +Durable, root‑cause fixes only. No muting tests, no broad catch‑and‑ignore, no widening APIs “to make green”. + +**Strictly avoid** +* Sleeping/timeouts to hide flakiness. +* Swallowing exceptions or weakening assertions. +* Reflection/internal state manipulation to bypass interfaces. +* Feature flags that disable validation instead of fixing logic. +* Changing public APIs/configs without necessity tied to root cause. + +**Preferred approach** +* Reproduce the issue and isolate the smallest failing test (class → method). +* Trace to the true source; fix in the right module. +* Add focused tests for behavior/edge cases (Routine A) or prove coverage/neutrality (Routine B). +* Run tight, targeted verifies; broaden only if needed. + +--- + +## Enforcement & Auto‑Fail Triggers + +Your run is **invalid** and must be restarted from “Reproduce first” if any occur: + +* You modify production code before adding and running the smallest failing test in this repo **for behavior‑changing work**. +* You proceed without pasting a Surefire/Failsafe report snippet from `target/*-reports/`. +* Your plan does not have **exactly one** `in_progress` step. +* You run tests using `-am` or `-q`. +* You treat a narrative failure description or external stack trace as equivalent to an in‑repo failing test. +* **Routine B specific:** you cannot demonstrate that existing tests exercise the edited code (**Hit Proof**), or you fail to capture both pre‑ and post‑change **matching** passing snippets from the same selection. +* **Routine C breach:** you change production code while in a spike. + +**Recovery procedure:** +Update the plan (`in_progress: create failing test`), post a description of your next step, create the failing test, run it, capture the report snippet, then resume. +For Routine B refactors: if any gate fails, **switch to Full TDD** and add the smallest failing test. + +--- + +## Evidence Protocol (Mandatory) + +After each grouped action, post an **Evidence block**, then continue working: + +**Evidence template** +``` +Evidence: +Command: mvn -o -pl -Dtest=Class#method verify +Report: /target/surefire-reports/.txt +Snippet: +\ +``` + +**Routine B additions** +* **Pre‑green:** capture a pre‑change **passing** snippet from the **most specific** test selection that hits your code (ideally a class or method). +* **Hit Proof (choose one):** + * An existing test class/method that directly calls the edited class/method, plus a short `rg -n` snippet showing the call site; **or** + * A Surefire/Failsafe output line containing the edited class/method names; **or** + * A temporary assertion or deliberate, isolated failing check in a **scratch test** proving the path is executed (then remove). +* **Post‑green:** after the patch, re‑run the **same selection** and capture a passing snippet. + +--- + +### Initial Evidence Capture (Required) + +To avoid losing the first test evidence when later runs overwrite `target/*-reports/`, immediately persist the initial verify results to a top‑level `initial-evidence.txt` file. + +• On a fully green verify run: + +- Capture and store the last 200 lines of the Maven verify output. +- Example (module‑scoped): + - `mvn -o -pl verify | tee .initial-verify.log` + - `tail -200 .initial-verify.log > initial-evidence.txt` + +• On any failing verify run (unit or IT failures): + +- Concatenate the Surefire and/or Failsafe report text files into `initial-evidence.txt`. +- Example (repo‑root): + - `find . -type f \( -path "*/target/surefire-reports/*.txt" -o -path "*/target/failsafe-reports/*.txt" \) -print0 | xargs -0 cat > initial-evidence.txt` + +Notes + +- Keep `initial-evidence.txt` at the repository root alongside your final handoff. +- Do not rely on `target/*-reports/` for the final report; they may be overwritten by subsequent runs. +- Continue to include the standard Evidence block(s) in your messages as usual. + +--- + +## Living Plan Protocol (Sharper) + +Maintain a **living plan** with checklist items (5–7 words each). Keep **exactly one** `in_progress`. + +**Plan format** +``` + +Plan + +* \[done] sanity build quick profile +* \[in\_progress] add smallest failing test +* \[todo] minimal root-cause fix +* \[todo] rerun focused then module tests +* \[todo] format, verify, summary + +```` + +**Rule:** If you deviate, update the plan **first**, then proceed. + +--- + +## Environment + +* **JDK:** 11 (minimum). The project builds and runs on Java 11+. +* **Maven default:** run **offline** using `-o` whenever possible. +* **Network:** only to fetch missing deps/plugins; then rerun once without `-o`, and return offline. +* **Large project:** some module test suites can take **5–10 minutes**. Prefer **targeted** runs. + +### Maven `-am` usage (house rule) + +`-am` is helpful for **compiles**, hazardous for **tests**. + +* ✅ Use `-am` **only** for compile/verify with tests skipped (e.g. `-Pquick`): + * `mvn -o -pl -am -Pquick install` +* ❌ Do **not** use `-am` with `verify` when tests are enabled. + +**Two-step pattern (fast + safe)** +1. **Compile deps fast (skip tests):** + `mvn -o -pl -am -Pquick install` +2. **Run tests:** + `mvn -o -pl verify | tail -500` + +It is illegal to `-am` when running tests! +It is illegal to `-q` when running tests! + +--- + +## Always Install Before Tests (Required) + +The Maven reactor resolves inter-module dependencies from the local Maven repository (`~/.m2/repository`). +Running `install` publishes your changed modules there so downstream modules and tests pick up the correct versions. + +* Always run `mvn -o -Pquick install | tail -200` before you start working. This command typically takes up to 30 seconds. Never use a small timeout than 30,000 ms. +* Always run `mvn -o -Pquick install | tail -200` before any `verify` or test runs. +* If offline resolution fails due to a missing dependency or plugin, rerun the exact `install` command once without `-o`, then return offline. +* Skipping this step can lead to stale or missing artifacts during tests, producing confusing compilation or linkage errors. +* Never ever change the repo location. Never use `-Dmaven.repo.local=.m2_repo`. +* Always try to run these commands first to see if they run without needing any approvals from the user w.r.t. the sandboxing. + +Why this is mandatory + +- Tests must not use `-am`. Without `-am`, Maven will not build upstream modules when you run tests; it will resolve cross‑module dependencies from the local `~/.m2/repository` instead. +- Therefore, tests only see whatever versions were last published to `~/.m2`. If you change code in one module and then run tests in another, those tests will not see your changes unless the updated module has been installed to `~/.m2` first. +- The reliable way to ensure all tests always use the latest code across the entire multi‑module build is to install all modules to `~/.m2` before running any tests: run `mvn -o -Pquick install` at the repository root. +- In tight loops you may also install a specific module and its deps (`-pl -am -Pquick install`) to iterate quickly, but before executing tests anywhere that depend on your changes, run a root‑level `mvn -o -Pquick install` so the latest jars are available to the reactor from `~/.m2`. +--- + +## Quick Start (First 10 Minutes) + +1. **Discover** + * Inspect root `pom.xml` and module tree (see “Maven Module Overview”). + * Search fast with ripgrep: `rg -n ""` +2. **Build sanity (fast, skip tests)** + * `mvn -o -Pquick install | tail -200` +3. **Format (Java, imports, XML)** + * `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` +4. **Targeted tests (tight loops)** + * Module: `mvn -o -pl verify | tail -500` + * Class: `mvn -o -pl -Dtest=ClassName verify | tail -500` + * Method: `mvn -o -pl -Dtest=ClassName#method verify | tail -500` +5. **Inspect failures** + * **Unit (Surefire):** `/target/surefire-reports/` + * **IT (Failsafe):** `/target/failsafe-reports/` + +It is illegal to `-am` when running tests! +It is illegal to `-q` when running tests! + +--- + +## Routine A — Full TDD (Default) + +> Use for **all behavior‑changing work** and whenever Routine B gates do not all pass. + +### Bugfix Workflow (Mandatory) + +* **Reproduce first:** write the smallest focused test (class/method) that reproduces the reported bug **inside this repo**. Confirm it fails. +* **Keep the test as‑is:** do not weaken assertions or mute the failure. +* **Fix at the root:** minimal, surgical change in the correct module. +* **Verify locally:** re‑run the focused test, then the module’s tests. Avoid `-am`/`-q` with tests. +* **Broaden if needed:** expand scope only after targeted greens. +* **Document clearly:** failing output (pre‑fix), root cause, minimal fix, passing output (post‑fix). + +### Hard Gates + +* A failing test exists at the smallest scope (method/class). +* **No production patch before the failing test is observed and recorded.** +* Test runs avoid `-am` and `-q`. + +--- + +## Routine B — Change without new tests (Proportional, gated) + +> Use **only** when at least one Allowed Case applies **and** all Routine B **Gates** pass. + +### Allowed cases (one or more) +1. **Bugfix with existing failing test** in this repo (pinpoints class/method). +2. **Strictly behavior‑neutral refactor / cleanup / micro‑perf** with clear existing coverage hitting the edited path. +3. **Migration/rename/autogen refresh** where behavior is already characterized by existing tests. +4. **Build/CI/docs/logging/message changes** that do not alter runtime behavior or asserted outputs. +5. **Data/resource tweaks** not asserted by tests and not affecting behavior. + +### Routine B Gates (all must pass) +- **Neutrality/Scope:** No externally observable behavior change. Localized edit. +- **Hit Proof:** Demonstrate tests exercise the edited code. +- **Pre/Post Green Match:** Same smallest‑scope selection, passing before and after. +- **Risk Check:** No concurrency/time/IO semantics touched; no public API, serialization, parsing, or ordering changes. +- **Reversibility:** Change is easy to revert if needed. + +**If any gate fails → switch to Routine A.** + +--- + +## Routine C — Spike / Investigate (No production changes) + +> Use for exploration, triage, design spikes, and measurement. **No production code edits.** + +**You may:** +- Add temporary scratch tests, assertions, scripts, or notes. +- Capture measurements, traces, logs. + +**Hand‑off must include:** +- Description, commands, and artifacts (logs/notes). +- Findings, options, and a proposed next routine (A or B). +- Removal of any temporary code if not adopted. + +--- + +## Where to Draw the Line — A Short Debate + +> **Purist:** “All changes must start with a failing test.” +> **Pragmatist:** “For refactors that can’t fail first without faking it, prove coverage and equality of behavior.” + +**In‑scope for Routine B (examples)** +* Rename private methods; extract helper; dead‑code removal. +* Replace straightforward loop with stream (same results, same ordering). +* Tighten generics/nullability/annotations without observable change. +* Micro‑perf cache within a method with deterministic inputs and strong coverage. +* Logging/message tweaks **not** asserted by tests. +* Build/CI config that doesn’t alter runtime behavior. + +**Out‑of‑scope (use Routine A)** +* Changing query results, serialization, or parsing behavior. +* Altering error messages that tests assert. +* Anything touching concurrency, timeouts, IO, or ordering. +* New SPARQL function support or extended syntax (even “tiny”). +* Public API changes or cross‑module migrations with unclear blast radius. + +--- + +## Working Loop + +* **Plan:** small, verifiable steps; keep one `in_progress`. +* **Change:** minimal, surgical edits; keep style/structure consistent. +* **Format:** `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` +* **Compile (fast):** `mvn -o -pl -am -Pquick install | tail -500` +* **Test:** start smallest (class/method → module). For integration, run module `verify`. +* **Triage:** read reports; fix root cause; expand scope only when needed. +* **Iterate:** keep momentum; escalate only when blocked or irreversible. + +It is illegal to `-am` when running tests! +It is illegal to `-q` when running tests! + +--- + +## Testing Strategy + +* **Prefer module tests you touched:** `-pl ` +* **Narrow further** to a class/method; then broaden to the module. +* **Expand scope** when changes cross boundaries or neighbor modules fail. +* **Read reports** + * Surefire (unit): `target/surefire-reports/` + * Failsafe (IT): `target/failsafe-reports/` +* **Helpful flags** + * `-Dtest=Class#method` (unit selection) + * `-Dit.test=ITClass#method` (integration selection) + * `-DtrimStackTrace=false` (full traces) + * `-DskipITs` (focus on unit tests) + * `-DfailIfNoTests=false` (when selecting a class that has no tests on some platforms) + +### Optional: Redirect test stdout/stderr to files +```bash +mvn -o -pl -Dtest=ClassName[#method] -Dmaven.test.redirectTestOutputToFile=true verify | tail -500 +```` + +Logs under: + +``` +/target/surefire-reports/ClassName-output.txt +``` + +(Use similarly for Failsafe via `-Dit.test=`.) + +--- + +## Assertions: Make invariants explicit + +Assertions are executable claims about what must be true. Use **temporary tripwires** during investigation and **permanent contracts** once an invariant matters. + +* One fact per assert; fail fast and usefully. +* Include stable context in messages; avoid side effects. +* Keep asserts cheap; don’t replace user input validation with asserts. + +**Java specifics** + +* Enable VM assertions in tests (`-ea`). +* Use exceptions for runtime guarantees; `assert` for “cannot happen”. + +(Concrete examples omitted here for brevity; keep your current patterns.) + +--- + +## Triage Playbook + +* **Missing dep/plugin offline:** rerun the exact command once **without** `-o`, then return offline. +* **Compilation errors:** fix imports/generics/visibility; quick install in the module. +* **Flaky/slow tests:** run the specific failing test; stabilize root cause before broad runs. +* **Formatting failures:** run formatter/import/XML sort; re‑verify. +* **License header missing:** add for **new** files only; do not change years on existing files. + +--- + +## Code Formatting + +* Always run before finalizing: + + * `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` +* Style: no wildcard imports; 120‑char width; curly braces always; LF endings. + +--- + +## Source File Headers + +Use this exact header for **new Java files only** (replace `${year}` with current year): + +``` +/******************************************************************************* + * Copyright (c) ${year} Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +``` + +Do **not** modify existing headers’ years. + +--- + +## Pre‑Commit Checklist + +* **Format:** `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` +* **Compile (fast path):** `mvn -o -Pquick install | tail -200` +* **Tests (targeted):** `mvn -o -pl verify | tail -500` (broaden as needed) +* **Reports:** zero new failures in Surefire/Failsafe, or explain precisely. +* **Evidence:** Routine A — failing pre‑fix + passing post‑fix. + Routine B — **pre/post green** from same selection + **Hit Proof**. + +--- + +## Branching & Commit Conventions + +* Branch names: start with `GH-XXXX` (GitHub issue number). Optional short slug, e.g., `GH-1234-trig-writer-check`. +* Commit messages: `GH-XXXX ` on every commit. + +--- + +## Branch & PR Workflow (Agent) + +* Confirm issue number first (mandatory). +* Branch: `git checkout -b GH-XXXX-your-slug` +* Stage: `git add -A` (ensure new Java files have the required header). +* Optional: formatter + quick install. +* Commit: `git commit -m "GH-XXXX "` +* Push & PR: use the default template; fill all fields; include `Fixes #XXXX`. + +--- + +## Navigation & Search + +* Files: `rg --files` +* Content: `rg -n ""` +* Read big files in chunks: + + * `sed -n '1,200p' path/to/File.java` + * `sed -n '201,400p' path/to/File.java` + +--- + +## Autonomy Rules (Act > Ask) + +* **Default:** act with assumptions; document them. +* **Keep going:** chain steps; short progress updates before long actions. +* **Ask only when:** blocked by sandbox/approvals/network, or change is destructive/irreversible, or impacts public APIs/dependencies/licensing. +* **Prefer reversible moves:** smallest local change that unblocks progress; validate with targeted tests first. + +**Defaults** + +* **Tests:** start with `-pl `, then `-Dtest=Class#method` / `-Dit.test=ITClass#method`. +* **Build:** use `-o`; drop `-o` once only to fetch; return offline. +* **Formatting:** run formatter/import/XML before verify. +* **Reports:** read surefire/failsafe locally; expand scope only when necessary. + +--- + +## Answer Template (Use This) + +* **What changed:** summary of approach and rationale. +* **Files touched:** list file paths. +* **Commands run:** key build/test commands. +* **Verification:** which tests passed, where you checked reports. +* **Evidence:** + *Routine A:* failing output (pre‑fix) and passing output (post‑fix). + *Routine B:* pre‑ and post‑green snippets from the **same selection** + **Hit Proof**. + *Routine C:* artifacts from investigation (logs/notes/measurements) and proposed next steps. +* **Assumptions:** key assumptions and autonomous decisions. +* **Limitations:** anything left or risky edge cases. +* **Next steps:** optional follow‑ups. + +--- + +## Running Tests + +* By module: `mvn -o -pl core/sail/shacl verify | tail -500` +* Entire repo: `mvn -o verify` (long; only when appropriate) +* Slow tests (entire repo): + `mvn -o verify -PslowTestsOnly,-skipSlowTests | tail -500` +* Slow tests (by module): + `mvn -o -pl verify -PslowTestsOnly,-skipSlowTests | tail -500` +* Slow tests (specific test): + + * `mvn -o -pl core/sail/shacl -PslowTestsOnly,-skipSlowTests -Dtest=ClassName#method verify | tail -500` +* Integration tests (entire repo): + `mvn -o verify -PskipUnitTests | tail -500` +* Integration tests (by module): + `mvn -o -pl verify -PskipUnitTests | tail -500` +* Useful flags: + + * `-Dtest=ClassName` + * `-Dtest=ClassName#method` + * `-Dit.test=ITClass#method` + * `-DtrimStackTrace=false` + +--- + +## Build + +* **Build without tests (fast path):** + `mvn -o -Pquick install` +* **Verify with tests:** + Targeted module(s): `mvn -o -pl verify` + Entire repo: `mvn -o verify` (use judiciously) +* **When offline fails due to missing deps:** + Re‑run the **exact** command **without** `-o` once to fetch, then return to `-o`. + +--- + +## Using JaCoCo (Coverage) + +JaCoCo is configured via the `jacoco` Maven profile in the root POM. Surefire/Failsafe honor the prepared agent `argLine`, so no extra flags are required beyond `-Pjacoco`. + +- Run with coverage + - Module: `mvn -o -pl -Pjacoco verify | tail -500` + - Class: `mvn -o -pl -Pjacoco -Dtest=ClassName verify | tail -500` + - Method: `mvn -o -pl -Pjacoco -Dtest=ClassName#method verify | tail -500` + +- Where to find reports (per module) + - Exec data: `/target/jacoco.exec` + - HTML report: `/target/site/jacoco/index.html` + - XML report: `/target/site/jacoco/jacoco.xml` + +- Check if a specific test covers code X + - Run only that test (class or method) with `-Dtest=...` (see above) and `-Pjacoco`. + - Open the HTML report and navigate to the class/method of interest; non-zero line/branch coverage indicates the selected test touched it. + - For multiple tests, run them in small subsets to localize coverage quickly. + +- Troubleshooting + - If you see “Skipping JaCoCo execution due to missing execution data file”, ensure you passed `-Pjacoco` and ran the install step first. + - If offline resolution fails for the JaCoCo plugin, rerun the exact command once without `-o`, then return offline. + +- Notes + - The default JaCoCo reports do not list “which individual tests” hit each line. Use single-test runs to infer per-test coverage. If you need true per-test mapping, add a JUnit 5 extension that sets a JaCoCo session per test and writes per-test exec files. + - Do not use `-am` when running tests; keep runs targeted by module/class/method. + +--- + +## Prohibited Misinterpretations + +* A user stack trace, reproduction script, or verbal description **is not evidence** for behavior‑changing work. You must implement the smallest failing test **inside this repo**. +* For Routine B, a stack trace is neither required nor sufficient; **Hit Proof** plus **pre/post green** snippets are mandatory. +* Routine C must not change production code. + +--- + +## Maven Module Overview + +The project is organised as a multi-module Maven build. The diagram below lists +all modules and submodules with a short description for each. + +``` +rdf4j: root project +├── assembly-descriptors: RDF4J: Assembly Descriptors +├── core: Core modules for RDF4J + ├── common: RDF4J common: shared classes + │ ├── annotation: RDF4J common annotation classes + │ ├── exception: RDF4J common exception classes + │ ├── io: RDF4J common IO classes + │ ├── iterator: RDF4J common iterators + │ ├── order: Order of vars and statements + │ ├── text: RDF4J common text classes + │ ├── transaction: RDF4J common transaction classes + │ └── xml: RDF4J common XML classes + ├── model-api: RDF model interfaces. + ├── model-vocabulary: Well-Known RDF vocabularies. + ├── model: RDF model implementations. + ├── sparqlbuilder: A fluent SPARQL query builder + ├── rio: Rio (RDF I/O) is an API for parsers and writers of various RDF file formats. + │ ├── api: Rio API. + │ ├── languages: Rio Language handler implementations. + │ ├── datatypes: Rio Datatype handler implementations. + │ ├── binary: Rio parser and writer implementation for the binary RDF file format. + │ ├── hdt: Experimental Rio parser and writer implementation for the HDT file format. + │ ├── jsonld-legacy: Rio parser and writer implementation for the JSON-LD file format. + │ ├── jsonld: Rio parser and writer implementation for the JSON-LD file format. + │ ├── n3: Rio writer implementation for the N3 file format. + │ ├── nquads: Rio parser and writer implementation for the N-Quads file format. + │ ├── ntriples: Rio parser and writer implementation for the N-Triples file format. + │ ├── rdfjson: Rio parser and writer implementation for the RDF/JSON file format. + │ ├── rdfxml: Rio parser and writer implementation for the RDF/XML file format. + │ ├── trix: Rio parser and writer implementation for the TriX file format. + │ ├── turtle: Rio parser and writer implementation for the Turtle file format. + │ └── trig: Rio parser and writer implementation for the TriG file format. + ├── queryresultio: Query result IO API and implementations. + │ ├── api: Query result IO API + │ ├── binary: Query result parser and writer implementation for RDF4J's binary query results format. + │ ├── sparqljson: Query result writer implementation for the SPARQL Query Results JSON Format. + │ ├── sparqlxml: Query result parser and writer implementation for the SPARQL Query Results XML Format. + │ └── text: Query result parser and writer implementation for RDF4J's plain text boolean query results format. + ├── query: Query interfaces and implementations + ├── queryalgebra: Query algebra model and evaluation. + │ ├── model: A generic query algebra for RDF queries. + │ ├── evaluation: Evaluation strategy API and implementations for the query algebra model. + │ └── geosparql: Query algebra implementations to support the evaluation of GeoSPARQL. + ├── queryparser: Query parser API and implementations. + │ ├── api: Query language parsers API. + │ └── sparql: Query language parser implementation for SPARQL. + ├── http: Client and protocol for repository communication over HTTP. + │ ├── protocol: HTTP protocol (REST-style) + │ └── client: Client functionality for communicating with an RDF4J server over HTTP. + ├── queryrender: Query Render and Builder tools + ├── repository: Repository API and implementations. + │ ├── api: API for interacting with repositories of RDF data. + │ ├── manager: Repository manager + │ ├── sail: Repository that uses a Sail stack. + │ ├── dataset: Implementation that loads all referenced datasets into a wrapped repository + │ ├── event: Implementation that notifies listeners of events on a wrapped repository + │ ├── http: "Virtual" repository that communicates with a (remote) repository over the HTTP protocol. + │ ├── contextaware: Implementation that allows default values to be set on a wrapped repository + │ └── sparql: The SPARQL Repository provides a RDF4J Repository interface to any SPARQL end-point. + ├── sail: Sail API and implementations. + │ ├── api: RDF Storage And Inference Layer ("Sail") API. + │ ├── base: RDF Storage And Inference Layer ("Sail") API. + │ ├── inferencer: Stackable Sail implementation that adds RDF Schema inferencing to an RDF store. + │ ├── memory: Sail implementation that stores data in main memory, optionally using a dump-restore file for persistence. + │ ├── nativerdf: Sail implementation that stores data directly to disk in dedicated file formats. + │ ├── model: Sail implementation of Model. + │ ├── shacl: Stacked Sail with SHACL validation capabilities + │ ├── lmdb: Sail implementation that stores data to disk using LMDB. + │ ├── lucene-api: StackableSail API offering full-text search on literals, based on Apache Lucene. + │ ├── lucene: StackableSail implementation offering full-text search on literals, based on Apache Lucene. + │ ├── solr: StackableSail implementation offering full-text search on literals, based on Solr. + │ ├── elasticsearch: StackableSail implementation offering full-text search on literals, based on Elastic Search. + │ ├── elasticsearch-store: Store for utilizing Elasticsearch as a triplestore. + │ └── extensible-store: Store that can be extended with a simple user-made backend. + ├── spin: SPARQL input notation interfaces and implementations + ├── client: Parent POM for all RDF4J parsers, APIs and client libraries + ├── storage: Parent POM for all RDF4J storage and inferencing libraries + └── collection-factory: Collection Factories that may be reused for RDF4J + ├── api: Evaluation + ├── mapdb: Evaluation + └── mapdb3: Evaluation +├── tools: Server, Workbench, Console and other end-user tools for RDF4J. + ├── config: RDF4J application configuration classes + ├── console: Command line user interface to RDF4J repositories. + ├── federation: A federation engine for virtually integrating SPARQL endpoints + ├── server: HTTP server implementing a REST-style protocol + ├── server-spring: HTTP server implementing a REST-style protocol + ├── workbench: Workbench to interact with RDF4J servers. + ├── runtime: Runtime dependencies for an RDF4J application + └── runtime-osgi: OSGi Runtime dependencies for an RDF4J application +├── spring-components: Components to use with Spring + ├── spring-boot-sparql-web: HTTP server component implementing only the SPARQL protocol + ├── rdf4j-spring: Spring integration for RDF4J + └── rdf4j-spring-demo: Demo of a spring-boot project using an RDF4J repo as its backend +├── testsuites: Test suites for Eclipse RDF4J modules + ├── model: Reusable tests for Model API implementations + ├── rio: Test suite for Rio + ├── queryresultio: Reusable tests for QueryResultIO implementations + ├── sparql: Test suite for the SPARQL query language + ├── repository: Reusable tests for Repository API implementations + ├── sail: Reusable tests for Sail API implementations + ├── lucene: Generic tests for Lucene Sail implementations. + ├── geosparql: Test suite for the GeoSPARQL query language + └── benchmark: RDF4J: benchmarks +├── compliance: Eclipse RDF4J compliance and integration tests + ├── repository: Compliance testing for the Repository API implementations + ├── rio: Tests for parsers and writers of various RDF file formats. + ├── model: RDF4J: Model compliance tests + ├── sparql: Tests for the SPARQL query language implementation + ├── lucene: Compliance Tests for LuceneSail. + ├── solr: Tests for Solr Sail. + ├── elasticsearch: Tests for Elasticsearch. + └── geosparql: Tests for the GeoSPARQL query language implementation +├── examples: Examples and HowTos for use of RDF4J in Java +├── bom: RDF4J Bill of Materials (BOM) +└── assembly: Distribution bundle assembly +``` + +## Safety & Boundaries + +* Don’t commit or push unless explicitly asked. +* Don’t add new dependencies without explicit approval. + +It is illegal to `-am` when running tests! +It is illegal to `-q` when running tests! +You must follow these rules and instructions exactly as stated. From 6cc551822ce338de28fb4d64f8617ab743c00bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:27:31 +0200 Subject: [PATCH 10/18] test --- .../sail/base/SailDatasetImplSizeTest.java | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java diff --git a/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java b/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java new file mode 100644 index 00000000000..619786bdb49 --- /dev/null +++ b/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java @@ -0,0 +1,103 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.base; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.iteration.EmptyIteration; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Namespace; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.sail.SailException; +import org.junit.jupiter.api.Test; + +/** + * Verifies that SailDatasetImpl.size respects a pending clear() operation (statementCleared), and does not delegate to + * the backing dataset when cleared with no contexts. + */ +public class SailDatasetImplSizeTest { + + /** + * Minimal backing dataset that reports a fixed size regardless of arguments. + */ + private static final class FixedSizeDataset implements SailDataset { + private final long size; + + private FixedSizeDataset(long size) { + this.size = size; + } + + @Override + public void close() throws SailException { + // no-op + } + + @Override + public CloseableIteration getNamespaces() throws SailException { + return new EmptyIteration<>(); + } + + @Override + public String getNamespace(String prefix) throws SailException { + return null; + } + + @Override + public CloseableIteration getContextIDs() throws SailException { + return new EmptyIteration<>(); + } + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, + Resource... contexts) throws SailException { + return new EmptyIteration<>(); + } + + @Override + public long size(Resource subj, IRI pred, Value obj, Resource... contexts) { + return size; + } + } + + @Test + public void size_respects_statementCleared() { + // backing dataset contains data (non-zero size) + SailDataset backing = new FixedSizeDataset(5); + + // create a changeset and simulate clear() without contexts + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + // not used in this test + } + + @Override + public org.eclipse.rdf4j.model.Model createEmptyModel() { + return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + } + }; + + // clear() with zero contexts should mark statementCleared=true while leaving + // hasApproved()/hasDeprecated() false + changes.clear(); + + // snapshot over backing with pending clear should report size 0 + SailDataset snapshot = new SailDatasetImpl(backing, changes); + long snapshotSize = snapshot.size(null, null, null); + + assertEquals(0L, snapshotSize, + "size() should respect statementCleared and return 0 when cleared without contexts"); + } +} From a272f4462d7ba737003c84a1cf047dc9f82c074d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:36:28 +0200 Subject: [PATCH 11/18] fix --- .../rdf4j/sail/base/SailDatasetImpl.java | 4 +- .../sail/base/SailDatasetImplSizeTest.java | 238 ++++++++++++++++++ 2 files changed, 240 insertions(+), 2 deletions(-) diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java index 9c8cdf759d5..976cb5b94db 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java @@ -387,8 +387,8 @@ private boolean isDeprecated(Triple triple, List deprecatedStatements @Experimental @Override public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) { - // Fast path: no approved or deprecated - if (!changes.hasApproved() && !changes.hasDeprecated()) { + // Fast path: no approved or deprecated and not cleared + if (!changes.hasApproved() && !changes.hasDeprecated() && !changes.isStatementCleared()) { return derivedFrom.size(subj, pred, obj, contexts); } diff --git a/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java b/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java index 619786bdb49..305bee4a313 100644 --- a/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java +++ b/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java @@ -11,15 +11,19 @@ package org.eclipse.rdf4j.sail.base; +import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; import org.eclipse.rdf4j.common.iteration.EmptyIteration; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Namespace; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.sail.SailException; import org.junit.jupiter.api.Test; @@ -100,4 +104,238 @@ public org.eclipse.rdf4j.model.Model createEmptyModel() { assertEquals(0L, snapshotSize, "size() should respect statementCleared and return 0 when cleared without contexts"); } + + /** + * Backing dataset that returns a concrete set of statements and supports filtering. + */ + private static final class ListBackedDataset implements SailDataset { + private final java.util.List data; + + private ListBackedDataset(java.util.List data) { + this.data = java.util.List.copyOf(data); + } + + @Override + public void close() throws SailException { + } + + @Override + public CloseableIteration getNamespaces() throws SailException { + return new EmptyIteration<>(); + } + + @Override + public String getNamespace(String prefix) throws SailException { + return null; + } + + @Override + public CloseableIteration getContextIDs() throws SailException { + return new EmptyIteration<>(); + } + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, + Resource... contexts) throws SailException { + java.util.stream.Stream stream = data.stream(); + if (subj != null) { + stream = stream.filter(st -> subj.equals(st.getSubject())); + } + if (pred != null) { + stream = stream.filter(st -> pred.equals(st.getPredicate())); + } + if (obj != null) { + stream = stream.filter(st -> obj.equals(st.getObject())); + } + if (contexts != null && contexts.length > 0) { + java.util.Set ctxs = new java.util.HashSet<>(java.util.Arrays.asList(contexts)); + stream = stream.filter(st -> ctxs.contains(st.getContext())); + } + java.util.Iterator it = stream.iterator(); + return new CloseableIteratorIteration<>(it); + } + } + + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + private static final Resource CTX_A = VF.createIRI("urn:ctx:A"); + private static final Resource CTX_B = VF.createIRI("urn:ctx:B"); + private static final IRI P = VF.createIRI("urn:p"); + + private static Statement st(String s, String o, Resource ctx) { + return VF.createStatement(VF.createIRI("urn:s:" + s), P, VF.createIRI("urn:o:" + o), ctx); + } + + @Test + public void size_afterGlobalClear_countsApprovedOnly() { + SailDataset backing = new ListBackedDataset(java.util.List.of( + st("1", "1", CTX_A), + st("2", "2", CTX_B) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public org.eclipse.rdf4j.model.Model createEmptyModel() { + return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + } + }; + + // global clear: remove all backing statements from view + changes.clear(); + // approve two new statements (one per context) + changes.approve(st("a", "a", CTX_A)); + changes.approve(st("b", "b", CTX_B)); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + () -> assertEquals(2L, snapshot.size(null, null, null), + "after global clear, only approved statements are visible"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_A), + "context filter A should see 1 approved statement in A"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B), + "context filter B should see 1 approved statement in B") + ); + } + + @Test + public void size_afterContextClear_excludesClearedContextData() { + // backing has 2 in A and 3 in B + SailDataset backing = new ListBackedDataset(java.util.List.of( + st("1", "1", CTX_A), st("2", "2", CTX_A), + st("3", "3", CTX_B), st("4", "4", CTX_B), st("5", "5", CTX_B) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public org.eclipse.rdf4j.model.Model createEmptyModel() { + return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + } + }; + + // clear only context A + changes.clear(CTX_A); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + () -> assertEquals(3L, snapshot.size(null, null, null), + "global view should exclude cleared context A (only B remains)"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_A), + "cleared context A should be empty"), + () -> assertEquals(3L, snapshot.size(null, null, null, CTX_B), + "uncleared context B remains visible") + ); + } + + @Test + public void size_afterContextClear_withApprovedInClearedContext() { + // backing has 1 in A and 1 in B + SailDataset backing = new ListBackedDataset(java.util.List.of( + st("1", "1", CTX_A), + st("2", "2", CTX_B) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public org.eclipse.rdf4j.model.Model createEmptyModel() { + return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + } + }; + + // clear A, then approve a new statement in A and another in B + changes.clear(CTX_A); + changes.approve(st("a", "a", CTX_A)); + changes.approve(st("b", "b", CTX_B)); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Global: backing B (1) + approved A (1) + approved B (1) = 3 + // Context A: only approved in A (1) + // Context B: backing B (1) + approved B (1) = 2 + assertAll( + () -> assertEquals(3L, snapshot.size(null, null, null), "global view reflects clear+approvals"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_A), "A has only approved statements"), + () -> assertEquals(2L, snapshot.size(null, null, null, CTX_B), "B has both backing and approved") + ); + } + + @Test + public void size_noChanges_delegatesToDerivedFrom() { + // With no approved/deprecated and not cleared, must delegate to backing.size + SailDataset backing = new FixedSizeDataset(7); + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public org.eclipse.rdf4j.model.Model createEmptyModel() { + return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + } + }; + SailDataset snapshot = new SailDatasetImpl(backing, changes); + assertEquals(7L, snapshot.size(null, null, null)); + assertEquals(7L, snapshot.size(null, null, null, CTX_A)); + } + + @Test + public void size_withDeprecatedStatements_excludesDeprecatedOnes() { + Statement a1 = st("1", "1", CTX_A); + Statement b1 = st("2", "2", CTX_B); + SailDataset backing = new ListBackedDataset(java.util.List.of(a1, b1)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public org.eclipse.rdf4j.model.Model createEmptyModel() { + return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + } + }; + // deprecate one existing statement + changes.deprecate(a1); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + assertAll( + () -> assertEquals(1L, snapshot.size(null, null, null), "one deprecated removed from global view"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_A), "deprecated in A excluded"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B), "B remains visible") + ); + } + + @Test + public void size_withApprovedDuplicates_doesNotDoubleCount() { + Statement b1 = st("2", "2", CTX_B); + SailDataset backing = new ListBackedDataset(java.util.List.of(b1)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public org.eclipse.rdf4j.model.Model createEmptyModel() { + return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + } + }; + // approve same statement as in backing + changes.approve(b1); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + assertEquals(1L, snapshot.size(null, null, null), "approved duplicate must not be double-counted"); + } } From 7aad32be4de5a51f5e5668d6bf638b9d30c6a4ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:36:56 +0200 Subject: [PATCH 12/18] fix --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2b3ab2c0b5d..7d6d944acf2 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,4 @@ e2e/test-results .aider* /tools/server/.lwjgl/ /tools/server/.lwjgl/ +.m2_repo/ From bdbf88f134c39171545cd253accffdc8557ecb5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:37:51 +0200 Subject: [PATCH 13/18] wip --- AGENTS_temp.md | 77 +++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/AGENTS_temp.md b/AGENTS_temp.md index af461b6082d..a8e912ba7aa 100644 --- a/AGENTS_temp.md +++ b/AGENTS_temp.md @@ -129,7 +129,7 @@ After each grouped action, post an **Evidence block**, then continue working: **Evidence template** ``` Evidence: -Command: mvn -o -pl -Dtest=Class#method verify +Command: mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=Class#method verify Report: /target/surefire-reports/.txt Snippet: \ @@ -153,7 +153,7 @@ To avoid losing the first test evidence when later runs overwrite `target/*-repo - Capture and store the last 200 lines of the Maven verify output. - Example (module‑scoped): - - `mvn -o -pl verify | tee .initial-verify.log` + - `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tee .initial-verify.log` - `tail -200 .initial-verify.log > initial-evidence.txt` • On any failing verify run (unit or IT failures): @@ -195,6 +195,7 @@ Plan * **JDK:** 11 (minimum). The project builds and runs on Java 11+. * **Maven default:** run **offline** using `-o` whenever possible. +* **Maven local repo (required):** always pass `-Dmaven.repo.local=.m2_repo` on all Maven commands (install, verify, plugins, formatting). All examples in this document implicitly assume this flag, even if omitted. * **Network:** only to fetch missing deps/plugins; then rerun once without `-o`, and return offline. * **Large project:** some module test suites can take **5–10 minutes**. Prefer **targeted** runs. @@ -203,14 +204,14 @@ Plan `-am` is helpful for **compiles**, hazardous for **tests**. * ✅ Use `-am` **only** for compile/verify with tests skipped (e.g. `-Pquick`): - * `mvn -o -pl -am -Pquick install` + * `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install` * ❌ Do **not** use `-am` with `verify` when tests are enabled. **Two-step pattern (fast + safe)** 1. **Compile deps fast (skip tests):** - `mvn -o -pl -am -Pquick install` + `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install` 2. **Run tests:** - `mvn -o -pl verify | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` It is illegal to `-am` when running tests! It is illegal to `-q` when running tests! @@ -219,22 +220,22 @@ It is illegal to `-q` when running tests! ## Always Install Before Tests (Required) -The Maven reactor resolves inter-module dependencies from the local Maven repository (`~/.m2/repository`). +The Maven reactor resolves inter-module dependencies from the configured local Maven repository (here: `.m2_repo`). Running `install` publishes your changed modules there so downstream modules and tests pick up the correct versions. -* Always run `mvn -o -Pquick install | tail -200` before you start working. This command typically takes up to 30 seconds. Never use a small timeout than 30,000 ms. -* Always run `mvn -o -Pquick install | tail -200` before any `verify` or test runs. +* Always run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` before you start working. This command typically takes up to 30 seconds. Never use a small timeout than 30,000 ms. +* Always run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` before any `verify` or test runs. * If offline resolution fails due to a missing dependency or plugin, rerun the exact `install` command once without `-o`, then return offline. * Skipping this step can lead to stale or missing artifacts during tests, producing confusing compilation or linkage errors. -* Never ever change the repo location. Never use `-Dmaven.repo.local=.m2_repo`. +* Always use a workspace-local Maven repository: append `-Dmaven.repo.local=.m2_repo` to all Maven commands (install, verify, formatter, etc.). * Always try to run these commands first to see if they run without needing any approvals from the user w.r.t. the sandboxing. Why this is mandatory -- Tests must not use `-am`. Without `-am`, Maven will not build upstream modules when you run tests; it will resolve cross‑module dependencies from the local `~/.m2/repository` instead. -- Therefore, tests only see whatever versions were last published to `~/.m2`. If you change code in one module and then run tests in another, those tests will not see your changes unless the updated module has been installed to `~/.m2` first. -- The reliable way to ensure all tests always use the latest code across the entire multi‑module build is to install all modules to `~/.m2` before running any tests: run `mvn -o -Pquick install` at the repository root. -- In tight loops you may also install a specific module and its deps (`-pl -am -Pquick install`) to iterate quickly, but before executing tests anywhere that depend on your changes, run a root‑level `mvn -o -Pquick install` so the latest jars are available to the reactor from `~/.m2`. +- Tests must not use `-am`. Without `-am`, Maven will not build upstream modules when you run tests; it will resolve cross‑module dependencies from the configured local repository (here: `.m2_repo`). +- Therefore, tests only see whatever versions were last published to the configured local repo (`.m2_repo`). If you change code in one module and then run tests in another, those tests will not see your changes unless the updated module has been installed to `.m2_repo` first. +- The reliable way to ensure all tests always use the latest code across the entire multi‑module build is to install all modules to the configured local repo (`.m2_repo`) before running any tests: run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` at the repository root. +- In tight loops you may also install a specific module and its deps (`-pl -am -Pquick install`) to iterate quickly, but before executing tests anywhere that depend on your changes, run a root‑level `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` so the latest jars are available to the reactor from `.m2_repo`. --- ## Quick Start (First 10 Minutes) @@ -243,13 +244,13 @@ Why this is mandatory * Inspect root `pom.xml` and module tree (see “Maven Module Overview”). * Search fast with ripgrep: `rg -n ""` 2. **Build sanity (fast, skip tests)** - * `mvn -o -Pquick install | tail -200` + * `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` 3. **Format (Java, imports, XML)** - * `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` + * `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` 4. **Targeted tests (tight loops)** - * Module: `mvn -o -pl verify | tail -500` - * Class: `mvn -o -pl -Dtest=ClassName verify | tail -500` - * Method: `mvn -o -pl -Dtest=ClassName#method verify | tail -500` + * Module: `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` + * Class: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName verify | tail -500` + * Method: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName#method verify | tail -500` 5. **Inspect failures** * **Unit (Surefire):** `/target/surefire-reports/` * **IT (Failsafe):** `/target/failsafe-reports/` @@ -343,8 +344,8 @@ It is illegal to `-q` when running tests! * **Plan:** small, verifiable steps; keep one `in_progress`. * **Change:** minimal, surgical edits; keep style/structure consistent. -* **Format:** `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` -* **Compile (fast):** `mvn -o -pl -am -Pquick install | tail -500` +* **Format:** `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` +* **Compile (fast):** `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install | tail -500` * **Test:** start smallest (class/method → module). For integration, run module `verify`. * **Triage:** read reports; fix root cause; expand scope only when needed. * **Iterate:** keep momentum; escalate only when blocked or irreversible. @@ -371,7 +372,7 @@ It is illegal to `-q` when running tests! ### Optional: Redirect test stdout/stderr to files ```bash -mvn -o -pl -Dtest=ClassName[#method] -Dmaven.test.redirectTestOutputToFile=true verify | tail -500 +mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName[#method] -Dmaven.test.redirectTestOutputToFile=true verify | tail -500 ```` Logs under: @@ -415,7 +416,7 @@ Assertions are executable claims about what must be true. Use **temporary tripwi * Always run before finalizing: - * `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` + * `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` * Style: no wildcard imports; 120‑char width; curly braces always; LF endings. --- @@ -443,9 +444,9 @@ Do **not** modify existing headers’ years. ## Pre‑Commit Checklist -* **Format:** `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` -* **Compile (fast path):** `mvn -o -Pquick install | tail -200` -* **Tests (targeted):** `mvn -o -pl verify | tail -500` (broaden as needed) +* **Format:** `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` +* **Compile (fast path):** `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` +* **Tests (targeted):** `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` (broaden as needed) * **Reports:** zero new failures in Surefire/Failsafe, or explain precisely. * **Evidence:** Routine A — failing pre‑fix + passing post‑fix. Routine B — **pre/post green** from same selection + **Hit Proof**. @@ -515,19 +516,19 @@ Do **not** modify existing headers’ years. ## Running Tests -* By module: `mvn -o -pl core/sail/shacl verify | tail -500` -* Entire repo: `mvn -o verify` (long; only when appropriate) +* By module: `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/shacl verify | tail -500` +* Entire repo: `mvn -o -Dmaven.repo.local=.m2_repo verify` (long; only when appropriate) * Slow tests (entire repo): - `mvn -o verify -PslowTestsOnly,-skipSlowTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo verify -PslowTestsOnly,-skipSlowTests | tail -500` * Slow tests (by module): - `mvn -o -pl verify -PslowTestsOnly,-skipSlowTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo -pl verify -PslowTestsOnly,-skipSlowTests | tail -500` * Slow tests (specific test): - * `mvn -o -pl core/sail/shacl -PslowTestsOnly,-skipSlowTests -Dtest=ClassName#method verify | tail -500` + * `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/shacl -PslowTestsOnly,-skipSlowTests -Dtest=ClassName#method verify | tail -500` * Integration tests (entire repo): - `mvn -o verify -PskipUnitTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo verify -PskipUnitTests | tail -500` * Integration tests (by module): - `mvn -o -pl verify -PskipUnitTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo -pl verify -PskipUnitTests | tail -500` * Useful flags: * `-Dtest=ClassName` @@ -540,10 +541,10 @@ Do **not** modify existing headers’ years. ## Build * **Build without tests (fast path):** - `mvn -o -Pquick install` + `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` * **Verify with tests:** - Targeted module(s): `mvn -o -pl verify` - Entire repo: `mvn -o verify` (use judiciously) + Targeted module(s): `mvn -o -Dmaven.repo.local=.m2_repo -pl verify` + Entire repo: `mvn -o -Dmaven.repo.local=.m2_repo verify` (use judiciously) * **When offline fails due to missing deps:** Re‑run the **exact** command **without** `-o` once to fetch, then return to `-o`. @@ -554,9 +555,9 @@ Do **not** modify existing headers’ years. JaCoCo is configured via the `jacoco` Maven profile in the root POM. Surefire/Failsafe honor the prepared agent `argLine`, so no extra flags are required beyond `-Pjacoco`. - Run with coverage - - Module: `mvn -o -pl -Pjacoco verify | tail -500` - - Class: `mvn -o -pl -Pjacoco -Dtest=ClassName verify | tail -500` - - Method: `mvn -o -pl -Pjacoco -Dtest=ClassName#method verify | tail -500` + - Module: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco verify | tail -500` + - Class: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco -Dtest=ClassName verify | tail -500` + - Method: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco -Dtest=ClassName#method verify | tail -500` - Where to find reports (per module) - Exec data: `/target/jacoco.exec` From 590bbeff3021ff36309aa8f99414b94c28d0ca88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:42:32 +0200 Subject: [PATCH 14/18] wip --- AGENTS_temp.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/AGENTS_temp.md b/AGENTS_temp.md index a8e912ba7aa..43824dfe312 100644 --- a/AGENTS_temp.md +++ b/AGENTS_temp.md @@ -423,7 +423,9 @@ Assertions are executable claims about what must be true. Use **temporary tripwi ## Source File Headers -Use this exact header for **new Java files only** (replace `${year}` with current year): +Strict requirement — copy/paste exactly. All new Java source files MUST begin with the exact header below. The text, spacing, punctuation, URL, and SPDX line must be identical. Replace `${year}` with the correct current year at the time the file is created. + +Hint: get the current year with `date +%Y`. ``` /******************************************************************************* From a18c6025ea9f517d943b5fcfbe8ca09fb86cc5fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:42:54 +0200 Subject: [PATCH 15/18] wip --- AGENTS.md | 81 +++--- AGENTS_temp.md | 723 ------------------------------------------------- 2 files changed, 42 insertions(+), 762 deletions(-) delete mode 100644 AGENTS_temp.md diff --git a/AGENTS.md b/AGENTS.md index af461b6082d..43824dfe312 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -129,7 +129,7 @@ After each grouped action, post an **Evidence block**, then continue working: **Evidence template** ``` Evidence: -Command: mvn -o -pl -Dtest=Class#method verify +Command: mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=Class#method verify Report: /target/surefire-reports/.txt Snippet: \ @@ -153,7 +153,7 @@ To avoid losing the first test evidence when later runs overwrite `target/*-repo - Capture and store the last 200 lines of the Maven verify output. - Example (module‑scoped): - - `mvn -o -pl verify | tee .initial-verify.log` + - `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tee .initial-verify.log` - `tail -200 .initial-verify.log > initial-evidence.txt` • On any failing verify run (unit or IT failures): @@ -195,6 +195,7 @@ Plan * **JDK:** 11 (minimum). The project builds and runs on Java 11+. * **Maven default:** run **offline** using `-o` whenever possible. +* **Maven local repo (required):** always pass `-Dmaven.repo.local=.m2_repo` on all Maven commands (install, verify, plugins, formatting). All examples in this document implicitly assume this flag, even if omitted. * **Network:** only to fetch missing deps/plugins; then rerun once without `-o`, and return offline. * **Large project:** some module test suites can take **5–10 minutes**. Prefer **targeted** runs. @@ -203,14 +204,14 @@ Plan `-am` is helpful for **compiles**, hazardous for **tests**. * ✅ Use `-am` **only** for compile/verify with tests skipped (e.g. `-Pquick`): - * `mvn -o -pl -am -Pquick install` + * `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install` * ❌ Do **not** use `-am` with `verify` when tests are enabled. **Two-step pattern (fast + safe)** 1. **Compile deps fast (skip tests):** - `mvn -o -pl -am -Pquick install` + `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install` 2. **Run tests:** - `mvn -o -pl verify | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` It is illegal to `-am` when running tests! It is illegal to `-q` when running tests! @@ -219,22 +220,22 @@ It is illegal to `-q` when running tests! ## Always Install Before Tests (Required) -The Maven reactor resolves inter-module dependencies from the local Maven repository (`~/.m2/repository`). +The Maven reactor resolves inter-module dependencies from the configured local Maven repository (here: `.m2_repo`). Running `install` publishes your changed modules there so downstream modules and tests pick up the correct versions. -* Always run `mvn -o -Pquick install | tail -200` before you start working. This command typically takes up to 30 seconds. Never use a small timeout than 30,000 ms. -* Always run `mvn -o -Pquick install | tail -200` before any `verify` or test runs. +* Always run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` before you start working. This command typically takes up to 30 seconds. Never use a small timeout than 30,000 ms. +* Always run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` before any `verify` or test runs. * If offline resolution fails due to a missing dependency or plugin, rerun the exact `install` command once without `-o`, then return offline. * Skipping this step can lead to stale or missing artifacts during tests, producing confusing compilation or linkage errors. -* Never ever change the repo location. Never use `-Dmaven.repo.local=.m2_repo`. +* Always use a workspace-local Maven repository: append `-Dmaven.repo.local=.m2_repo` to all Maven commands (install, verify, formatter, etc.). * Always try to run these commands first to see if they run without needing any approvals from the user w.r.t. the sandboxing. Why this is mandatory -- Tests must not use `-am`. Without `-am`, Maven will not build upstream modules when you run tests; it will resolve cross‑module dependencies from the local `~/.m2/repository` instead. -- Therefore, tests only see whatever versions were last published to `~/.m2`. If you change code in one module and then run tests in another, those tests will not see your changes unless the updated module has been installed to `~/.m2` first. -- The reliable way to ensure all tests always use the latest code across the entire multi‑module build is to install all modules to `~/.m2` before running any tests: run `mvn -o -Pquick install` at the repository root. -- In tight loops you may also install a specific module and its deps (`-pl -am -Pquick install`) to iterate quickly, but before executing tests anywhere that depend on your changes, run a root‑level `mvn -o -Pquick install` so the latest jars are available to the reactor from `~/.m2`. +- Tests must not use `-am`. Without `-am`, Maven will not build upstream modules when you run tests; it will resolve cross‑module dependencies from the configured local repository (here: `.m2_repo`). +- Therefore, tests only see whatever versions were last published to the configured local repo (`.m2_repo`). If you change code in one module and then run tests in another, those tests will not see your changes unless the updated module has been installed to `.m2_repo` first. +- The reliable way to ensure all tests always use the latest code across the entire multi‑module build is to install all modules to the configured local repo (`.m2_repo`) before running any tests: run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` at the repository root. +- In tight loops you may also install a specific module and its deps (`-pl -am -Pquick install`) to iterate quickly, but before executing tests anywhere that depend on your changes, run a root‑level `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` so the latest jars are available to the reactor from `.m2_repo`. --- ## Quick Start (First 10 Minutes) @@ -243,13 +244,13 @@ Why this is mandatory * Inspect root `pom.xml` and module tree (see “Maven Module Overview”). * Search fast with ripgrep: `rg -n ""` 2. **Build sanity (fast, skip tests)** - * `mvn -o -Pquick install | tail -200` + * `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` 3. **Format (Java, imports, XML)** - * `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` + * `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` 4. **Targeted tests (tight loops)** - * Module: `mvn -o -pl verify | tail -500` - * Class: `mvn -o -pl -Dtest=ClassName verify | tail -500` - * Method: `mvn -o -pl -Dtest=ClassName#method verify | tail -500` + * Module: `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` + * Class: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName verify | tail -500` + * Method: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName#method verify | tail -500` 5. **Inspect failures** * **Unit (Surefire):** `/target/surefire-reports/` * **IT (Failsafe):** `/target/failsafe-reports/` @@ -343,8 +344,8 @@ It is illegal to `-q` when running tests! * **Plan:** small, verifiable steps; keep one `in_progress`. * **Change:** minimal, surgical edits; keep style/structure consistent. -* **Format:** `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` -* **Compile (fast):** `mvn -o -pl -am -Pquick install | tail -500` +* **Format:** `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` +* **Compile (fast):** `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install | tail -500` * **Test:** start smallest (class/method → module). For integration, run module `verify`. * **Triage:** read reports; fix root cause; expand scope only when needed. * **Iterate:** keep momentum; escalate only when blocked or irreversible. @@ -371,7 +372,7 @@ It is illegal to `-q` when running tests! ### Optional: Redirect test stdout/stderr to files ```bash -mvn -o -pl -Dtest=ClassName[#method] -Dmaven.test.redirectTestOutputToFile=true verify | tail -500 +mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName[#method] -Dmaven.test.redirectTestOutputToFile=true verify | tail -500 ```` Logs under: @@ -415,14 +416,16 @@ Assertions are executable claims about what must be true. Use **temporary tripwi * Always run before finalizing: - * `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` + * `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` * Style: no wildcard imports; 120‑char width; curly braces always; LF endings. --- ## Source File Headers -Use this exact header for **new Java files only** (replace `${year}` with current year): +Strict requirement — copy/paste exactly. All new Java source files MUST begin with the exact header below. The text, spacing, punctuation, URL, and SPDX line must be identical. Replace `${year}` with the correct current year at the time the file is created. + +Hint: get the current year with `date +%Y`. ``` /******************************************************************************* @@ -443,9 +446,9 @@ Do **not** modify existing headers’ years. ## Pre‑Commit Checklist -* **Format:** `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` -* **Compile (fast path):** `mvn -o -Pquick install | tail -200` -* **Tests (targeted):** `mvn -o -pl verify | tail -500` (broaden as needed) +* **Format:** `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` +* **Compile (fast path):** `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` +* **Tests (targeted):** `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` (broaden as needed) * **Reports:** zero new failures in Surefire/Failsafe, or explain precisely. * **Evidence:** Routine A — failing pre‑fix + passing post‑fix. Routine B — **pre/post green** from same selection + **Hit Proof**. @@ -515,19 +518,19 @@ Do **not** modify existing headers’ years. ## Running Tests -* By module: `mvn -o -pl core/sail/shacl verify | tail -500` -* Entire repo: `mvn -o verify` (long; only when appropriate) +* By module: `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/shacl verify | tail -500` +* Entire repo: `mvn -o -Dmaven.repo.local=.m2_repo verify` (long; only when appropriate) * Slow tests (entire repo): - `mvn -o verify -PslowTestsOnly,-skipSlowTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo verify -PslowTestsOnly,-skipSlowTests | tail -500` * Slow tests (by module): - `mvn -o -pl verify -PslowTestsOnly,-skipSlowTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo -pl verify -PslowTestsOnly,-skipSlowTests | tail -500` * Slow tests (specific test): - * `mvn -o -pl core/sail/shacl -PslowTestsOnly,-skipSlowTests -Dtest=ClassName#method verify | tail -500` + * `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/shacl -PslowTestsOnly,-skipSlowTests -Dtest=ClassName#method verify | tail -500` * Integration tests (entire repo): - `mvn -o verify -PskipUnitTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo verify -PskipUnitTests | tail -500` * Integration tests (by module): - `mvn -o -pl verify -PskipUnitTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo -pl verify -PskipUnitTests | tail -500` * Useful flags: * `-Dtest=ClassName` @@ -540,10 +543,10 @@ Do **not** modify existing headers’ years. ## Build * **Build without tests (fast path):** - `mvn -o -Pquick install` + `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` * **Verify with tests:** - Targeted module(s): `mvn -o -pl verify` - Entire repo: `mvn -o verify` (use judiciously) + Targeted module(s): `mvn -o -Dmaven.repo.local=.m2_repo -pl verify` + Entire repo: `mvn -o -Dmaven.repo.local=.m2_repo verify` (use judiciously) * **When offline fails due to missing deps:** Re‑run the **exact** command **without** `-o` once to fetch, then return to `-o`. @@ -554,9 +557,9 @@ Do **not** modify existing headers’ years. JaCoCo is configured via the `jacoco` Maven profile in the root POM. Surefire/Failsafe honor the prepared agent `argLine`, so no extra flags are required beyond `-Pjacoco`. - Run with coverage - - Module: `mvn -o -pl -Pjacoco verify | tail -500` - - Class: `mvn -o -pl -Pjacoco -Dtest=ClassName verify | tail -500` - - Method: `mvn -o -pl -Pjacoco -Dtest=ClassName#method verify | tail -500` + - Module: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco verify | tail -500` + - Class: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco -Dtest=ClassName verify | tail -500` + - Method: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco -Dtest=ClassName#method verify | tail -500` - Where to find reports (per module) - Exec data: `/target/jacoco.exec` diff --git a/AGENTS_temp.md b/AGENTS_temp.md deleted file mode 100644 index 43824dfe312..00000000000 --- a/AGENTS_temp.md +++ /dev/null @@ -1,723 +0,0 @@ -# AGENTS.md - -Welcome, AI Agent! Your persistence, curiosity, and craftsmanship make a difference. Take your time, work methodically, validate thoroughly, and iterate. This repository is large and tests can take time — that’s expected and supported. - -You need to read the entire AGENTS.md file and follow all instructions exactly. Keep this fresh in your context as you work. - -> **Timebox:** Aim to complete each autonomous run in **15–30 minutes**. - ---- - -## Read‑Me‑Now: Proportional Test‑First Rule (Default) - -**Default:** Use **test‑first (TDD)** for any change that alters externally observable behavior. - -**Proportional exceptions:** You may **skip writing a new failing test** *only* when **all** Routine B gates (below) pass, or when using Routine C (Spike/Investigate) with **no production code changes**. - -**You may not touch production code for behavior‑changing work until a smallest‑scope failing automated test exists inside this repo and you have captured its report snippet.** A user‑provided stack trace or “obvious” contract violation is **not** a substitute for an in‑repo failing test. - -**Auto‑stop:** If you realize you patched production before creating/observing the failing test for behavior‑changing work, **stop**, revert the patch, and resume from “Reproduce first”. - -**Traceability trio (must appear in your handoff):** -1. **Descritpion** (what you’re about to do) -2. **Evidence** (Surefire/Failsafe snippet from this repo) -3. **Plan** (one and only one `in_progress` step) - -It is illegal to `-am` when running tests! -It is illegal to `-q` when running tests! - -> **Clarification:** For **strictly behavior‑neutral refactors** that are already **fully exercised by existing tests**, or for **bugfixes with an existing failing test**, you may use **Routine B — Change without new tests**. In that case you must capture **pre‑change passing evidence** at the smallest scope that hits the code you’re about to edit, prove **Hit Proof**, then show **post‑change passing evidence** from the **same selection**. -> **No exceptions for any behavior‑changing change** — for those, you must follow **Routine A — Full TDD**. - ---- - -## Three Routines: Choose Your Path - -**Routine A — Full TDD (Default)** -**Routine B — Change without new tests (Proportional, gated)** -**Routine C — Spike/Investigate (No production changes)** - -### Decision quickstart - -1. **Is new externally observable behavior required?** - → **Yes:** **Routine A (Full TDD)**. Add the smallest failing test first. - → **No:** continue. - -2. **Does a failing test already exist in this repo that pinpoints the issue?** - → **Yes:** **Routine B (Bugfix using existing failing test).** - → **No:** continue. - -3. **Is the edit strictly behavior‑neutral, local in scope, and clearly hit by existing tests?** - → **Yes:** **Routine B (Refactor/micro‑perf/documentation/build).** - → **No or unsure:** continue. - -4. **Is this purely an investigation/design spike with no production code changes?** - → **Yes:** **Routine C (Spike/Investigate).** - → **No or unsure:** **Routine A.** - -**When in doubt, choose Routine A (Full TDD).** Ambiguity is risk; tests are insurance. - ---- - -## Proportionality Model (Think before you test) - -Score the change on these lenses. If any are **High**, prefer **Routine A**. - -- **Behavioral surface:** affects outputs, serialization, parsing, APIs, error text, timing/order? -- **Blast radius:** number of modules/classes touched; public vs internal. -- **Reversibility:** quick revert vs migration/data change. -- **Observability:** can existing tests or assertions expose regressions? -- **Coverage depth:** do existing tests directly hit the edited code? -- **Concurrency / IO / Time:** any risk here is **High** by default. - ---- - -## Purpose & Contract - -* **Bold goal:** deliver correct, minimal, well‑tested changes with clear handoff. Fix root causes; avoid hacks. -* **Bias to action:** when inputs are ambiguous, choose a reasonable path, state assumptions, and proceed. -* **Ask only when blocked or irreversible:** permissions, missing deps, conflicting requirements, destructive repo‑wide changes. -* **Definition of Done** - * Code formatted and imports sorted. - * Compiles with a quick profile / targeted modules. - * Relevant module tests pass; failures triaged or crisply explained. - * Only necessary files changed; headers correct for new files. - * Clear final summary: what changed, why, where, how verified, next steps. - * **Evidence present:** failing test output (pre‑fix) and passing output (post‑fix) are shown for Routine A; for Routine B show **pre/post green** from the **same selection** plus **Hit Proof**. - -### No Monkey‑Patching or Band‑Aid Fixes (Non‑Negotiable) - -Durable, root‑cause fixes only. No muting tests, no broad catch‑and‑ignore, no widening APIs “to make green”. - -**Strictly avoid** -* Sleeping/timeouts to hide flakiness. -* Swallowing exceptions or weakening assertions. -* Reflection/internal state manipulation to bypass interfaces. -* Feature flags that disable validation instead of fixing logic. -* Changing public APIs/configs without necessity tied to root cause. - -**Preferred approach** -* Reproduce the issue and isolate the smallest failing test (class → method). -* Trace to the true source; fix in the right module. -* Add focused tests for behavior/edge cases (Routine A) or prove coverage/neutrality (Routine B). -* Run tight, targeted verifies; broaden only if needed. - ---- - -## Enforcement & Auto‑Fail Triggers - -Your run is **invalid** and must be restarted from “Reproduce first” if any occur: - -* You modify production code before adding and running the smallest failing test in this repo **for behavior‑changing work**. -* You proceed without pasting a Surefire/Failsafe report snippet from `target/*-reports/`. -* Your plan does not have **exactly one** `in_progress` step. -* You run tests using `-am` or `-q`. -* You treat a narrative failure description or external stack trace as equivalent to an in‑repo failing test. -* **Routine B specific:** you cannot demonstrate that existing tests exercise the edited code (**Hit Proof**), or you fail to capture both pre‑ and post‑change **matching** passing snippets from the same selection. -* **Routine C breach:** you change production code while in a spike. - -**Recovery procedure:** -Update the plan (`in_progress: create failing test`), post a description of your next step, create the failing test, run it, capture the report snippet, then resume. -For Routine B refactors: if any gate fails, **switch to Full TDD** and add the smallest failing test. - ---- - -## Evidence Protocol (Mandatory) - -After each grouped action, post an **Evidence block**, then continue working: - -**Evidence template** -``` -Evidence: -Command: mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=Class#method verify -Report: /target/surefire-reports/.txt -Snippet: -\ -``` - -**Routine B additions** -* **Pre‑green:** capture a pre‑change **passing** snippet from the **most specific** test selection that hits your code (ideally a class or method). -* **Hit Proof (choose one):** - * An existing test class/method that directly calls the edited class/method, plus a short `rg -n` snippet showing the call site; **or** - * A Surefire/Failsafe output line containing the edited class/method names; **or** - * A temporary assertion or deliberate, isolated failing check in a **scratch test** proving the path is executed (then remove). -* **Post‑green:** after the patch, re‑run the **same selection** and capture a passing snippet. - ---- - -### Initial Evidence Capture (Required) - -To avoid losing the first test evidence when later runs overwrite `target/*-reports/`, immediately persist the initial verify results to a top‑level `initial-evidence.txt` file. - -• On a fully green verify run: - -- Capture and store the last 200 lines of the Maven verify output. -- Example (module‑scoped): - - `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tee .initial-verify.log` - - `tail -200 .initial-verify.log > initial-evidence.txt` - -• On any failing verify run (unit or IT failures): - -- Concatenate the Surefire and/or Failsafe report text files into `initial-evidence.txt`. -- Example (repo‑root): - - `find . -type f \( -path "*/target/surefire-reports/*.txt" -o -path "*/target/failsafe-reports/*.txt" \) -print0 | xargs -0 cat > initial-evidence.txt` - -Notes - -- Keep `initial-evidence.txt` at the repository root alongside your final handoff. -- Do not rely on `target/*-reports/` for the final report; they may be overwritten by subsequent runs. -- Continue to include the standard Evidence block(s) in your messages as usual. - ---- - -## Living Plan Protocol (Sharper) - -Maintain a **living plan** with checklist items (5–7 words each). Keep **exactly one** `in_progress`. - -**Plan format** -``` - -Plan - -* \[done] sanity build quick profile -* \[in\_progress] add smallest failing test -* \[todo] minimal root-cause fix -* \[todo] rerun focused then module tests -* \[todo] format, verify, summary - -```` - -**Rule:** If you deviate, update the plan **first**, then proceed. - ---- - -## Environment - -* **JDK:** 11 (minimum). The project builds and runs on Java 11+. -* **Maven default:** run **offline** using `-o` whenever possible. -* **Maven local repo (required):** always pass `-Dmaven.repo.local=.m2_repo` on all Maven commands (install, verify, plugins, formatting). All examples in this document implicitly assume this flag, even if omitted. -* **Network:** only to fetch missing deps/plugins; then rerun once without `-o`, and return offline. -* **Large project:** some module test suites can take **5–10 minutes**. Prefer **targeted** runs. - -### Maven `-am` usage (house rule) - -`-am` is helpful for **compiles**, hazardous for **tests**. - -* ✅ Use `-am` **only** for compile/verify with tests skipped (e.g. `-Pquick`): - * `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install` -* ❌ Do **not** use `-am` with `verify` when tests are enabled. - -**Two-step pattern (fast + safe)** -1. **Compile deps fast (skip tests):** - `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install` -2. **Run tests:** - `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` - -It is illegal to `-am` when running tests! -It is illegal to `-q` when running tests! - ---- - -## Always Install Before Tests (Required) - -The Maven reactor resolves inter-module dependencies from the configured local Maven repository (here: `.m2_repo`). -Running `install` publishes your changed modules there so downstream modules and tests pick up the correct versions. - -* Always run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` before you start working. This command typically takes up to 30 seconds. Never use a small timeout than 30,000 ms. -* Always run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` before any `verify` or test runs. -* If offline resolution fails due to a missing dependency or plugin, rerun the exact `install` command once without `-o`, then return offline. -* Skipping this step can lead to stale or missing artifacts during tests, producing confusing compilation or linkage errors. -* Always use a workspace-local Maven repository: append `-Dmaven.repo.local=.m2_repo` to all Maven commands (install, verify, formatter, etc.). -* Always try to run these commands first to see if they run without needing any approvals from the user w.r.t. the sandboxing. - -Why this is mandatory - -- Tests must not use `-am`. Without `-am`, Maven will not build upstream modules when you run tests; it will resolve cross‑module dependencies from the configured local repository (here: `.m2_repo`). -- Therefore, tests only see whatever versions were last published to the configured local repo (`.m2_repo`). If you change code in one module and then run tests in another, those tests will not see your changes unless the updated module has been installed to `.m2_repo` first. -- The reliable way to ensure all tests always use the latest code across the entire multi‑module build is to install all modules to the configured local repo (`.m2_repo`) before running any tests: run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` at the repository root. -- In tight loops you may also install a specific module and its deps (`-pl -am -Pquick install`) to iterate quickly, but before executing tests anywhere that depend on your changes, run a root‑level `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` so the latest jars are available to the reactor from `.m2_repo`. ---- - -## Quick Start (First 10 Minutes) - -1. **Discover** - * Inspect root `pom.xml` and module tree (see “Maven Module Overview”). - * Search fast with ripgrep: `rg -n ""` -2. **Build sanity (fast, skip tests)** - * `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` -3. **Format (Java, imports, XML)** - * `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` -4. **Targeted tests (tight loops)** - * Module: `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` - * Class: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName verify | tail -500` - * Method: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName#method verify | tail -500` -5. **Inspect failures** - * **Unit (Surefire):** `/target/surefire-reports/` - * **IT (Failsafe):** `/target/failsafe-reports/` - -It is illegal to `-am` when running tests! -It is illegal to `-q` when running tests! - ---- - -## Routine A — Full TDD (Default) - -> Use for **all behavior‑changing work** and whenever Routine B gates do not all pass. - -### Bugfix Workflow (Mandatory) - -* **Reproduce first:** write the smallest focused test (class/method) that reproduces the reported bug **inside this repo**. Confirm it fails. -* **Keep the test as‑is:** do not weaken assertions or mute the failure. -* **Fix at the root:** minimal, surgical change in the correct module. -* **Verify locally:** re‑run the focused test, then the module’s tests. Avoid `-am`/`-q` with tests. -* **Broaden if needed:** expand scope only after targeted greens. -* **Document clearly:** failing output (pre‑fix), root cause, minimal fix, passing output (post‑fix). - -### Hard Gates - -* A failing test exists at the smallest scope (method/class). -* **No production patch before the failing test is observed and recorded.** -* Test runs avoid `-am` and `-q`. - ---- - -## Routine B — Change without new tests (Proportional, gated) - -> Use **only** when at least one Allowed Case applies **and** all Routine B **Gates** pass. - -### Allowed cases (one or more) -1. **Bugfix with existing failing test** in this repo (pinpoints class/method). -2. **Strictly behavior‑neutral refactor / cleanup / micro‑perf** with clear existing coverage hitting the edited path. -3. **Migration/rename/autogen refresh** where behavior is already characterized by existing tests. -4. **Build/CI/docs/logging/message changes** that do not alter runtime behavior or asserted outputs. -5. **Data/resource tweaks** not asserted by tests and not affecting behavior. - -### Routine B Gates (all must pass) -- **Neutrality/Scope:** No externally observable behavior change. Localized edit. -- **Hit Proof:** Demonstrate tests exercise the edited code. -- **Pre/Post Green Match:** Same smallest‑scope selection, passing before and after. -- **Risk Check:** No concurrency/time/IO semantics touched; no public API, serialization, parsing, or ordering changes. -- **Reversibility:** Change is easy to revert if needed. - -**If any gate fails → switch to Routine A.** - ---- - -## Routine C — Spike / Investigate (No production changes) - -> Use for exploration, triage, design spikes, and measurement. **No production code edits.** - -**You may:** -- Add temporary scratch tests, assertions, scripts, or notes. -- Capture measurements, traces, logs. - -**Hand‑off must include:** -- Description, commands, and artifacts (logs/notes). -- Findings, options, and a proposed next routine (A or B). -- Removal of any temporary code if not adopted. - ---- - -## Where to Draw the Line — A Short Debate - -> **Purist:** “All changes must start with a failing test.” -> **Pragmatist:** “For refactors that can’t fail first without faking it, prove coverage and equality of behavior.” - -**In‑scope for Routine B (examples)** -* Rename private methods; extract helper; dead‑code removal. -* Replace straightforward loop with stream (same results, same ordering). -* Tighten generics/nullability/annotations without observable change. -* Micro‑perf cache within a method with deterministic inputs and strong coverage. -* Logging/message tweaks **not** asserted by tests. -* Build/CI config that doesn’t alter runtime behavior. - -**Out‑of‑scope (use Routine A)** -* Changing query results, serialization, or parsing behavior. -* Altering error messages that tests assert. -* Anything touching concurrency, timeouts, IO, or ordering. -* New SPARQL function support or extended syntax (even “tiny”). -* Public API changes or cross‑module migrations with unclear blast radius. - ---- - -## Working Loop - -* **Plan:** small, verifiable steps; keep one `in_progress`. -* **Change:** minimal, surgical edits; keep style/structure consistent. -* **Format:** `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` -* **Compile (fast):** `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install | tail -500` -* **Test:** start smallest (class/method → module). For integration, run module `verify`. -* **Triage:** read reports; fix root cause; expand scope only when needed. -* **Iterate:** keep momentum; escalate only when blocked or irreversible. - -It is illegal to `-am` when running tests! -It is illegal to `-q` when running tests! - ---- - -## Testing Strategy - -* **Prefer module tests you touched:** `-pl ` -* **Narrow further** to a class/method; then broaden to the module. -* **Expand scope** when changes cross boundaries or neighbor modules fail. -* **Read reports** - * Surefire (unit): `target/surefire-reports/` - * Failsafe (IT): `target/failsafe-reports/` -* **Helpful flags** - * `-Dtest=Class#method` (unit selection) - * `-Dit.test=ITClass#method` (integration selection) - * `-DtrimStackTrace=false` (full traces) - * `-DskipITs` (focus on unit tests) - * `-DfailIfNoTests=false` (when selecting a class that has no tests on some platforms) - -### Optional: Redirect test stdout/stderr to files -```bash -mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName[#method] -Dmaven.test.redirectTestOutputToFile=true verify | tail -500 -```` - -Logs under: - -``` -/target/surefire-reports/ClassName-output.txt -``` - -(Use similarly for Failsafe via `-Dit.test=`.) - ---- - -## Assertions: Make invariants explicit - -Assertions are executable claims about what must be true. Use **temporary tripwires** during investigation and **permanent contracts** once an invariant matters. - -* One fact per assert; fail fast and usefully. -* Include stable context in messages; avoid side effects. -* Keep asserts cheap; don’t replace user input validation with asserts. - -**Java specifics** - -* Enable VM assertions in tests (`-ea`). -* Use exceptions for runtime guarantees; `assert` for “cannot happen”. - -(Concrete examples omitted here for brevity; keep your current patterns.) - ---- - -## Triage Playbook - -* **Missing dep/plugin offline:** rerun the exact command once **without** `-o`, then return offline. -* **Compilation errors:** fix imports/generics/visibility; quick install in the module. -* **Flaky/slow tests:** run the specific failing test; stabilize root cause before broad runs. -* **Formatting failures:** run formatter/import/XML sort; re‑verify. -* **License header missing:** add for **new** files only; do not change years on existing files. - ---- - -## Code Formatting - -* Always run before finalizing: - - * `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` -* Style: no wildcard imports; 120‑char width; curly braces always; LF endings. - ---- - -## Source File Headers - -Strict requirement — copy/paste exactly. All new Java source files MUST begin with the exact header below. The text, spacing, punctuation, URL, and SPDX line must be identical. Replace `${year}` with the correct current year at the time the file is created. - -Hint: get the current year with `date +%Y`. - -``` -/******************************************************************************* - * Copyright (c) ${year} Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -``` - -Do **not** modify existing headers’ years. - ---- - -## Pre‑Commit Checklist - -* **Format:** `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` -* **Compile (fast path):** `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` -* **Tests (targeted):** `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` (broaden as needed) -* **Reports:** zero new failures in Surefire/Failsafe, or explain precisely. -* **Evidence:** Routine A — failing pre‑fix + passing post‑fix. - Routine B — **pre/post green** from same selection + **Hit Proof**. - ---- - -## Branching & Commit Conventions - -* Branch names: start with `GH-XXXX` (GitHub issue number). Optional short slug, e.g., `GH-1234-trig-writer-check`. -* Commit messages: `GH-XXXX ` on every commit. - ---- - -## Branch & PR Workflow (Agent) - -* Confirm issue number first (mandatory). -* Branch: `git checkout -b GH-XXXX-your-slug` -* Stage: `git add -A` (ensure new Java files have the required header). -* Optional: formatter + quick install. -* Commit: `git commit -m "GH-XXXX "` -* Push & PR: use the default template; fill all fields; include `Fixes #XXXX`. - ---- - -## Navigation & Search - -* Files: `rg --files` -* Content: `rg -n ""` -* Read big files in chunks: - - * `sed -n '1,200p' path/to/File.java` - * `sed -n '201,400p' path/to/File.java` - ---- - -## Autonomy Rules (Act > Ask) - -* **Default:** act with assumptions; document them. -* **Keep going:** chain steps; short progress updates before long actions. -* **Ask only when:** blocked by sandbox/approvals/network, or change is destructive/irreversible, or impacts public APIs/dependencies/licensing. -* **Prefer reversible moves:** smallest local change that unblocks progress; validate with targeted tests first. - -**Defaults** - -* **Tests:** start with `-pl `, then `-Dtest=Class#method` / `-Dit.test=ITClass#method`. -* **Build:** use `-o`; drop `-o` once only to fetch; return offline. -* **Formatting:** run formatter/import/XML before verify. -* **Reports:** read surefire/failsafe locally; expand scope only when necessary. - ---- - -## Answer Template (Use This) - -* **What changed:** summary of approach and rationale. -* **Files touched:** list file paths. -* **Commands run:** key build/test commands. -* **Verification:** which tests passed, where you checked reports. -* **Evidence:** - *Routine A:* failing output (pre‑fix) and passing output (post‑fix). - *Routine B:* pre‑ and post‑green snippets from the **same selection** + **Hit Proof**. - *Routine C:* artifacts from investigation (logs/notes/measurements) and proposed next steps. -* **Assumptions:** key assumptions and autonomous decisions. -* **Limitations:** anything left or risky edge cases. -* **Next steps:** optional follow‑ups. - ---- - -## Running Tests - -* By module: `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/shacl verify | tail -500` -* Entire repo: `mvn -o -Dmaven.repo.local=.m2_repo verify` (long; only when appropriate) -* Slow tests (entire repo): - `mvn -o -Dmaven.repo.local=.m2_repo verify -PslowTestsOnly,-skipSlowTests | tail -500` -* Slow tests (by module): - `mvn -o -Dmaven.repo.local=.m2_repo -pl verify -PslowTestsOnly,-skipSlowTests | tail -500` -* Slow tests (specific test): - - * `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/shacl -PslowTestsOnly,-skipSlowTests -Dtest=ClassName#method verify | tail -500` -* Integration tests (entire repo): - `mvn -o -Dmaven.repo.local=.m2_repo verify -PskipUnitTests | tail -500` -* Integration tests (by module): - `mvn -o -Dmaven.repo.local=.m2_repo -pl verify -PskipUnitTests | tail -500` -* Useful flags: - - * `-Dtest=ClassName` - * `-Dtest=ClassName#method` - * `-Dit.test=ITClass#method` - * `-DtrimStackTrace=false` - ---- - -## Build - -* **Build without tests (fast path):** - `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` -* **Verify with tests:** - Targeted module(s): `mvn -o -Dmaven.repo.local=.m2_repo -pl verify` - Entire repo: `mvn -o -Dmaven.repo.local=.m2_repo verify` (use judiciously) -* **When offline fails due to missing deps:** - Re‑run the **exact** command **without** `-o` once to fetch, then return to `-o`. - ---- - -## Using JaCoCo (Coverage) - -JaCoCo is configured via the `jacoco` Maven profile in the root POM. Surefire/Failsafe honor the prepared agent `argLine`, so no extra flags are required beyond `-Pjacoco`. - -- Run with coverage - - Module: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco verify | tail -500` - - Class: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco -Dtest=ClassName verify | tail -500` - - Method: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco -Dtest=ClassName#method verify | tail -500` - -- Where to find reports (per module) - - Exec data: `/target/jacoco.exec` - - HTML report: `/target/site/jacoco/index.html` - - XML report: `/target/site/jacoco/jacoco.xml` - -- Check if a specific test covers code X - - Run only that test (class or method) with `-Dtest=...` (see above) and `-Pjacoco`. - - Open the HTML report and navigate to the class/method of interest; non-zero line/branch coverage indicates the selected test touched it. - - For multiple tests, run them in small subsets to localize coverage quickly. - -- Troubleshooting - - If you see “Skipping JaCoCo execution due to missing execution data file”, ensure you passed `-Pjacoco` and ran the install step first. - - If offline resolution fails for the JaCoCo plugin, rerun the exact command once without `-o`, then return offline. - -- Notes - - The default JaCoCo reports do not list “which individual tests” hit each line. Use single-test runs to infer per-test coverage. If you need true per-test mapping, add a JUnit 5 extension that sets a JaCoCo session per test and writes per-test exec files. - - Do not use `-am` when running tests; keep runs targeted by module/class/method. - ---- - -## Prohibited Misinterpretations - -* A user stack trace, reproduction script, or verbal description **is not evidence** for behavior‑changing work. You must implement the smallest failing test **inside this repo**. -* For Routine B, a stack trace is neither required nor sufficient; **Hit Proof** plus **pre/post green** snippets are mandatory. -* Routine C must not change production code. - ---- - -## Maven Module Overview - -The project is organised as a multi-module Maven build. The diagram below lists -all modules and submodules with a short description for each. - -``` -rdf4j: root project -├── assembly-descriptors: RDF4J: Assembly Descriptors -├── core: Core modules for RDF4J - ├── common: RDF4J common: shared classes - │ ├── annotation: RDF4J common annotation classes - │ ├── exception: RDF4J common exception classes - │ ├── io: RDF4J common IO classes - │ ├── iterator: RDF4J common iterators - │ ├── order: Order of vars and statements - │ ├── text: RDF4J common text classes - │ ├── transaction: RDF4J common transaction classes - │ └── xml: RDF4J common XML classes - ├── model-api: RDF model interfaces. - ├── model-vocabulary: Well-Known RDF vocabularies. - ├── model: RDF model implementations. - ├── sparqlbuilder: A fluent SPARQL query builder - ├── rio: Rio (RDF I/O) is an API for parsers and writers of various RDF file formats. - │ ├── api: Rio API. - │ ├── languages: Rio Language handler implementations. - │ ├── datatypes: Rio Datatype handler implementations. - │ ├── binary: Rio parser and writer implementation for the binary RDF file format. - │ ├── hdt: Experimental Rio parser and writer implementation for the HDT file format. - │ ├── jsonld-legacy: Rio parser and writer implementation for the JSON-LD file format. - │ ├── jsonld: Rio parser and writer implementation for the JSON-LD file format. - │ ├── n3: Rio writer implementation for the N3 file format. - │ ├── nquads: Rio parser and writer implementation for the N-Quads file format. - │ ├── ntriples: Rio parser and writer implementation for the N-Triples file format. - │ ├── rdfjson: Rio parser and writer implementation for the RDF/JSON file format. - │ ├── rdfxml: Rio parser and writer implementation for the RDF/XML file format. - │ ├── trix: Rio parser and writer implementation for the TriX file format. - │ ├── turtle: Rio parser and writer implementation for the Turtle file format. - │ └── trig: Rio parser and writer implementation for the TriG file format. - ├── queryresultio: Query result IO API and implementations. - │ ├── api: Query result IO API - │ ├── binary: Query result parser and writer implementation for RDF4J's binary query results format. - │ ├── sparqljson: Query result writer implementation for the SPARQL Query Results JSON Format. - │ ├── sparqlxml: Query result parser and writer implementation for the SPARQL Query Results XML Format. - │ └── text: Query result parser and writer implementation for RDF4J's plain text boolean query results format. - ├── query: Query interfaces and implementations - ├── queryalgebra: Query algebra model and evaluation. - │ ├── model: A generic query algebra for RDF queries. - │ ├── evaluation: Evaluation strategy API and implementations for the query algebra model. - │ └── geosparql: Query algebra implementations to support the evaluation of GeoSPARQL. - ├── queryparser: Query parser API and implementations. - │ ├── api: Query language parsers API. - │ └── sparql: Query language parser implementation for SPARQL. - ├── http: Client and protocol for repository communication over HTTP. - │ ├── protocol: HTTP protocol (REST-style) - │ └── client: Client functionality for communicating with an RDF4J server over HTTP. - ├── queryrender: Query Render and Builder tools - ├── repository: Repository API and implementations. - │ ├── api: API for interacting with repositories of RDF data. - │ ├── manager: Repository manager - │ ├── sail: Repository that uses a Sail stack. - │ ├── dataset: Implementation that loads all referenced datasets into a wrapped repository - │ ├── event: Implementation that notifies listeners of events on a wrapped repository - │ ├── http: "Virtual" repository that communicates with a (remote) repository over the HTTP protocol. - │ ├── contextaware: Implementation that allows default values to be set on a wrapped repository - │ └── sparql: The SPARQL Repository provides a RDF4J Repository interface to any SPARQL end-point. - ├── sail: Sail API and implementations. - │ ├── api: RDF Storage And Inference Layer ("Sail") API. - │ ├── base: RDF Storage And Inference Layer ("Sail") API. - │ ├── inferencer: Stackable Sail implementation that adds RDF Schema inferencing to an RDF store. - │ ├── memory: Sail implementation that stores data in main memory, optionally using a dump-restore file for persistence. - │ ├── nativerdf: Sail implementation that stores data directly to disk in dedicated file formats. - │ ├── model: Sail implementation of Model. - │ ├── shacl: Stacked Sail with SHACL validation capabilities - │ ├── lmdb: Sail implementation that stores data to disk using LMDB. - │ ├── lucene-api: StackableSail API offering full-text search on literals, based on Apache Lucene. - │ ├── lucene: StackableSail implementation offering full-text search on literals, based on Apache Lucene. - │ ├── solr: StackableSail implementation offering full-text search on literals, based on Solr. - │ ├── elasticsearch: StackableSail implementation offering full-text search on literals, based on Elastic Search. - │ ├── elasticsearch-store: Store for utilizing Elasticsearch as a triplestore. - │ └── extensible-store: Store that can be extended with a simple user-made backend. - ├── spin: SPARQL input notation interfaces and implementations - ├── client: Parent POM for all RDF4J parsers, APIs and client libraries - ├── storage: Parent POM for all RDF4J storage and inferencing libraries - └── collection-factory: Collection Factories that may be reused for RDF4J - ├── api: Evaluation - ├── mapdb: Evaluation - └── mapdb3: Evaluation -├── tools: Server, Workbench, Console and other end-user tools for RDF4J. - ├── config: RDF4J application configuration classes - ├── console: Command line user interface to RDF4J repositories. - ├── federation: A federation engine for virtually integrating SPARQL endpoints - ├── server: HTTP server implementing a REST-style protocol - ├── server-spring: HTTP server implementing a REST-style protocol - ├── workbench: Workbench to interact with RDF4J servers. - ├── runtime: Runtime dependencies for an RDF4J application - └── runtime-osgi: OSGi Runtime dependencies for an RDF4J application -├── spring-components: Components to use with Spring - ├── spring-boot-sparql-web: HTTP server component implementing only the SPARQL protocol - ├── rdf4j-spring: Spring integration for RDF4J - └── rdf4j-spring-demo: Demo of a spring-boot project using an RDF4J repo as its backend -├── testsuites: Test suites for Eclipse RDF4J modules - ├── model: Reusable tests for Model API implementations - ├── rio: Test suite for Rio - ├── queryresultio: Reusable tests for QueryResultIO implementations - ├── sparql: Test suite for the SPARQL query language - ├── repository: Reusable tests for Repository API implementations - ├── sail: Reusable tests for Sail API implementations - ├── lucene: Generic tests for Lucene Sail implementations. - ├── geosparql: Test suite for the GeoSPARQL query language - └── benchmark: RDF4J: benchmarks -├── compliance: Eclipse RDF4J compliance and integration tests - ├── repository: Compliance testing for the Repository API implementations - ├── rio: Tests for parsers and writers of various RDF file formats. - ├── model: RDF4J: Model compliance tests - ├── sparql: Tests for the SPARQL query language implementation - ├── lucene: Compliance Tests for LuceneSail. - ├── solr: Tests for Solr Sail. - ├── elasticsearch: Tests for Elasticsearch. - └── geosparql: Tests for the GeoSPARQL query language implementation -├── examples: Examples and HowTos for use of RDF4J in Java -├── bom: RDF4J Bill of Materials (BOM) -└── assembly: Distribution bundle assembly -``` - -## Safety & Boundaries - -* Don’t commit or push unless explicitly asked. -* Don’t add new dependencies without explicit approval. - -It is illegal to `-am` when running tests! -It is illegal to `-q` when running tests! -You must follow these rules and instructions exactly as stated. From b28a528993b7372bed0293d0b672c2c864de65ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 08:43:37 +0200 Subject: [PATCH 16/18] add mor tests --- .../sail/base/SailDatasetImplSizeTest.java | 308 ++++++++++++++++-- 1 file changed, 283 insertions(+), 25 deletions(-) diff --git a/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java b/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java index 305bee4a313..bc82bf96403 100644 --- a/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java +++ b/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java @@ -14,15 +14,24 @@ import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; + import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; import org.eclipse.rdf4j.common.iteration.EmptyIteration; import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.Namespace; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.LinkedHashModel; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.sail.SailException; import org.junit.jupiter.api.Test; @@ -88,8 +97,8 @@ public void flush() throws SailException { } @Override - public org.eclipse.rdf4j.model.Model createEmptyModel() { - return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + public Model createEmptyModel() { + return new LinkedHashModel(); } }; @@ -109,10 +118,10 @@ public org.eclipse.rdf4j.model.Model createEmptyModel() { * Backing dataset that returns a concrete set of statements and supports filtering. */ private static final class ListBackedDataset implements SailDataset { - private final java.util.List data; + private final List data; - private ListBackedDataset(java.util.List data) { - this.data = java.util.List.copyOf(data); + private ListBackedDataset(List data) { + this.data = List.copyOf(data); } @Override @@ -137,7 +146,7 @@ public CloseableIteration getContextIDs() throws SailExcepti @Override public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, Resource... contexts) throws SailException { - java.util.stream.Stream stream = data.stream(); + Stream stream = data.stream(); if (subj != null) { stream = stream.filter(st -> subj.equals(st.getSubject())); } @@ -148,10 +157,10 @@ public CloseableIteration getStatements(Resource subj, IRI stream = stream.filter(st -> obj.equals(st.getObject())); } if (contexts != null && contexts.length > 0) { - java.util.Set ctxs = new java.util.HashSet<>(java.util.Arrays.asList(contexts)); + Set ctxs = new HashSet<>(Arrays.asList(contexts)); stream = stream.filter(st -> ctxs.contains(st.getContext())); } - java.util.Iterator it = stream.iterator(); + Iterator it = stream.iterator(); return new CloseableIteratorIteration<>(it); } } @@ -159,15 +168,264 @@ public CloseableIteration getStatements(Resource subj, IRI private static final ValueFactory VF = SimpleValueFactory.getInstance(); private static final Resource CTX_A = VF.createIRI("urn:ctx:A"); private static final Resource CTX_B = VF.createIRI("urn:ctx:B"); + private static final Resource CTX_C = VF.createIRI("urn:ctx:C"); private static final IRI P = VF.createIRI("urn:p"); + private static final IRI Q = VF.createIRI("urn:q"); private static Statement st(String s, String o, Resource ctx) { return VF.createStatement(VF.createIRI("urn:s:" + s), P, VF.createIRI("urn:o:" + o), ctx); } + @Test + public void size_afterMultiContextClear_andMixedContextQueries() { + SailDataset backing = new ListBackedDataset(List.of( + st("1", "1", CTX_A), + st("2", "2", CTX_B), + st("3", "3", CTX_C) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // clear two contexts at once (A and B) + changes.clear(CTX_A, CTX_B); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Global should only include C; mixed queries should reflect filters properly + assertAll( + () -> assertEquals(1L, snapshot.size(null, null, null), "only C remains globally"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_A), "A cleared => empty"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_B), "B cleared => empty"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_C), "C remains"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B, CTX_C), "B cleared, C remains => 1") + ); + } + + @Test + public void size_withSubjPredObjFilters_afterContextClear() { + var s1 = VF.createIRI("urn:s:1"); + var o1 = VF.createIRI("urn:o:1"); + var o2 = VF.createIRI("urn:o:2"); + + SailDataset backing = new ListBackedDataset(List.of( + VF.createStatement(s1, P, o1, CTX_A), + VF.createStatement(s1, P, o1, CTX_B), + VF.createStatement(VF.createIRI("urn:s:2"), P, o2, CTX_B) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + changes.clear(CTX_A); + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + () -> assertEquals(1L, snapshot.size(s1, P, null), "s1@A removed, only s1@B remains"), + () -> assertEquals(1L, snapshot.size(null, P, o2), "o2 only in B remains"), + () -> assertEquals(0L, snapshot.size(s1, P, o1, CTX_A), "A cleared => empty for filter"), + () -> assertEquals(1L, snapshot.size(s1, P, o1, CTX_B), "B unaffected => 1 for filter") + ); + } + + @Test + public void size_withFilters_afterGlobalClear_withApprovals() { + var s1 = VF.createIRI("urn:s:1"); + var o1 = VF.createIRI("urn:o:1"); + var o2 = VF.createIRI("urn:o:2"); + + SailDataset backing = new ListBackedDataset(List.of( + VF.createStatement(VF.createIRI("urn:s:x"), P, VF.createIRI("urn:o:x"), CTX_A) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + changes.clear(); + changes.approve(VF.createStatement(s1, P, o1, CTX_A)); + changes.approve(VF.createStatement(s1, P, o2, CTX_B)); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + () -> assertEquals(2L, snapshot.size(s1, P, null), "two approved for s1 after global clear"), + () -> assertEquals(1L, snapshot.size(null, P, o1), "only approved o1 remains"), + () -> assertEquals(1L, snapshot.size(null, P, o2), "only approved o2 remains") + ); + } + + @Test + public void size_tripleTerms_afterClear_andApprovals() { + // create a triple value, then use it as subject and object in statements + var ts = VF.createTriple(VF.createIRI("urn:ts:s"), P, VF.createIRI("urn:ts:o")); + var to = VF.createTriple(VF.createIRI("urn:to:s"), P, VF.createIRI("urn:to:o")); + + Statement subjTripleInA = VF.createStatement((Resource) ts, P, VF.createIRI("urn:o:X"), CTX_A); + Statement objTripleInB = VF.createStatement(VF.createIRI("urn:s:Y"), P, to, CTX_B); + + SailDataset backing = new ListBackedDataset(List.of(subjTripleInA, objTripleInB)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // clear A, then approve a triple-subject statement in C + changes.clear(CTX_A); + Statement approvedInC = VF.createStatement((Resource) ts, P, VF.createIRI("urn:o:Z"), CTX_C); + changes.approve(approvedInC); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Expected: removed subjTripleInA (cleared); kept objTripleInB; added approvedInC + assertAll( + () -> assertEquals(2L, snapshot.size(null, null, null), "B + approved C"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_A), "A cleared"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B), "B remains"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_C), "approved in C") + ); + } + + @Test + public void size_deprecatedThenApprovedDuplicate_acrossContexts() { + // Same SPO in two contexts in backing + Statement a = st("x", "y", CTX_A); + Statement b = st("x", "y", CTX_B); + SailDataset backing = new ListBackedDataset(List.of(a, b)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // Deprecate B from backing, then approve B again + changes.deprecate(b); + changes.approve(b); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Expect both contexts visible, and no double count + assertAll( + () -> assertEquals(2L, snapshot.size(null, null, null), "A and B should both be visible"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_A), "A visible"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B), "B re-approved and visible") + ); + } + + @Test + public void size_contextArrayWithDuplicatesAndNulls() { + // Backing has one in default (null) context and one in A + Statement def = VF.createStatement(VF.createIRI("urn:s:def"), P, VF.createIRI("urn:o:def")); + Statement inA = st("a", "a", CTX_A); + SailDataset backing = new ListBackedDataset(List.of(def, inA)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // Clear default graph only + changes.clear((Resource) null); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Ask size with duplicate and null contexts in the query + assertAll( + () -> assertEquals(1L, snapshot.size(null, null, null), "Only A remains globally"), + () -> assertEquals(0L, snapshot.size(null, null, null, (Resource) null), "default graph cleared"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_A, (Resource) null, CTX_A), + "duplicates ignored; default graph cleared; A remains") + ); + } + + @Test + public void size_additionalFilterCombinations_predOnly_objOnly_mixed() { + var s1 = VF.createIRI("urn:s:1"); + var s2 = VF.createIRI("urn:s:2"); + var o1 = VF.createIRI("urn:o:1"); + var o2 = VF.createIRI("urn:o:2"); + + SailDataset backing = new ListBackedDataset(List.of( + VF.createStatement(s1, P, o1, CTX_A), + VF.createStatement(s2, P, o2, CTX_B), + VF.createStatement(s2, Q, o2, CTX_A) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // Clear B, then approve a new P/o2 in A for s2 + changes.clear(CTX_B); + changes.approve(VF.createStatement(s2, P, o2, CTX_A)); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + // pred-only across all contexts: s1@A (P/o1), s2@A (P/o2 approved) => 2 + () -> assertEquals(2L, snapshot.size(null, P, null), "two P statements after clear+approve"), + // obj-only: o2 now appears only in A (approved); B was cleared + () -> assertEquals(2L, snapshot.size(null, null, o2), "Q@A + approved P@A have o2"), + // mixed filter: s2,P,null => only approved one in A + () -> assertEquals(1L, snapshot.size(s2, P, null), "s2@P only approved in A remains"), + // context filter combos + () -> assertEquals(2L, snapshot.size(null, null, o2, CTX_A), "both P/Q@A with o2 => 2"), + () -> assertEquals(0L, snapshot.size(null, null, o2, CTX_B), "B cleared") + ); + } + @Test public void size_afterGlobalClear_countsApprovedOnly() { - SailDataset backing = new ListBackedDataset(java.util.List.of( + SailDataset backing = new ListBackedDataset(List.of( st("1", "1", CTX_A), st("2", "2", CTX_B) )); @@ -178,8 +436,8 @@ public void flush() throws SailException { } @Override - public org.eclipse.rdf4j.model.Model createEmptyModel() { - return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + public Model createEmptyModel() { + return new LinkedHashModel(); } }; @@ -204,7 +462,7 @@ public org.eclipse.rdf4j.model.Model createEmptyModel() { @Test public void size_afterContextClear_excludesClearedContextData() { // backing has 2 in A and 3 in B - SailDataset backing = new ListBackedDataset(java.util.List.of( + SailDataset backing = new ListBackedDataset(List.of( st("1", "1", CTX_A), st("2", "2", CTX_A), st("3", "3", CTX_B), st("4", "4", CTX_B), st("5", "5", CTX_B) )); @@ -215,8 +473,8 @@ public void flush() throws SailException { } @Override - public org.eclipse.rdf4j.model.Model createEmptyModel() { - return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + public Model createEmptyModel() { + return new LinkedHashModel(); } }; @@ -238,7 +496,7 @@ public org.eclipse.rdf4j.model.Model createEmptyModel() { @Test public void size_afterContextClear_withApprovedInClearedContext() { // backing has 1 in A and 1 in B - SailDataset backing = new ListBackedDataset(java.util.List.of( + SailDataset backing = new ListBackedDataset(List.of( st("1", "1", CTX_A), st("2", "2", CTX_B) )); @@ -249,8 +507,8 @@ public void flush() throws SailException { } @Override - public org.eclipse.rdf4j.model.Model createEmptyModel() { - return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + public Model createEmptyModel() { + return new LinkedHashModel(); } }; @@ -281,8 +539,8 @@ public void flush() throws SailException { } @Override - public org.eclipse.rdf4j.model.Model createEmptyModel() { - return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + public Model createEmptyModel() { + return new LinkedHashModel(); } }; SailDataset snapshot = new SailDatasetImpl(backing, changes); @@ -294,7 +552,7 @@ public org.eclipse.rdf4j.model.Model createEmptyModel() { public void size_withDeprecatedStatements_excludesDeprecatedOnes() { Statement a1 = st("1", "1", CTX_A); Statement b1 = st("2", "2", CTX_B); - SailDataset backing = new ListBackedDataset(java.util.List.of(a1, b1)); + SailDataset backing = new ListBackedDataset(List.of(a1, b1)); Changeset changes = new Changeset() { @Override @@ -302,8 +560,8 @@ public void flush() throws SailException { } @Override - public org.eclipse.rdf4j.model.Model createEmptyModel() { - return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + public Model createEmptyModel() { + return new LinkedHashModel(); } }; // deprecate one existing statement @@ -320,7 +578,7 @@ public org.eclipse.rdf4j.model.Model createEmptyModel() { @Test public void size_withApprovedDuplicates_doesNotDoubleCount() { Statement b1 = st("2", "2", CTX_B); - SailDataset backing = new ListBackedDataset(java.util.List.of(b1)); + SailDataset backing = new ListBackedDataset(List.of(b1)); Changeset changes = new Changeset() { @Override @@ -328,8 +586,8 @@ public void flush() throws SailException { } @Override - public org.eclipse.rdf4j.model.Model createEmptyModel() { - return new org.eclipse.rdf4j.model.impl.LinkedHashModel(); + public Model createEmptyModel() { + return new LinkedHashModel(); } }; // approve same statement as in backing From d59c4410a005529d4cabc4c357100a2cf83edf79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 09:07:53 +0200 Subject: [PATCH 17/18] add more tests --- .../lmdb/LmdbInferredDatasetSizeTest.java | 78 +++++++++++ .../MemoryContextSizeExplicitOnlyTest.java | 109 +++++++++++++++ .../sail/memory/MemoryDatasetSizeTest.java | 125 ++++++++++++++++++ 3 files changed, 312 insertions(+) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbInferredDatasetSizeTest.java create mode 100644 core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryContextSizeExplicitOnlyTest.java create mode 100644 core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryDatasetSizeTest.java diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbInferredDatasetSizeTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbInferredDatasetSizeTest.java new file mode 100644 index 00000000000..421001a7d69 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbInferredDatasetSizeTest.java @@ -0,0 +1,78 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; + +import org.eclipse.rdf4j.common.transaction.IsolationLevels; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.sail.base.SailDataset; +import org.eclipse.rdf4j.sail.base.SailSink; +import org.eclipse.rdf4j.sail.base.SailSource; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Reproduces regression: inferred dataset size() must count inferred statements. The current implementation ignores the + * dataset's explicit/inferred flag and always counts explicit-only, causing inferred datasets to report 0. + */ +public class LmdbInferredDatasetSizeTest { + + private final ValueFactory vf = SimpleValueFactory.getInstance(); + + private LmdbStore store; + + @BeforeEach + public void setUp(@TempDir File dataDir) { + store = new LmdbStore(dataDir); + store.init(); + } + + @AfterEach + public void tearDown() { + if (store != null) { + store.shutDown(); + } + } + + @Test + public void inferredDatasetSize_countsInferredStatements() throws Exception { + // Arrange: add one inferred statement via the inferred sink + LmdbSailStore backing = store.getBackingStore(); + SailSource inferred = backing.getInferredSailSource(); + SailSource explicit = backing.getExplicitSailSource(); + + IRI s = vf.createIRI("urn:s"); + IRI p = vf.createIRI("urn:p"); + IRI o = vf.createIRI("urn:o"); + + try (SailSink sink = inferred.sink(IsolationLevels.NONE)) { + sink.approve(s, p, o, null); + sink.flush(); + } + + // Act/Assert: inferred dataset sees the inferred statement, explicit does not + try (SailDataset ds = inferred.dataset(IsolationLevels.NONE)) { + assertEquals(1L, ds.size(null, null, null), "inferred dataset should count inferred statements"); + } + + try (SailDataset ds = explicit.dataset(IsolationLevels.NONE)) { + assertEquals(0L, ds.size(null, null, null), "explicit dataset should not include inferred statements"); + } + } +} diff --git a/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryContextSizeExplicitOnlyTest.java b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryContextSizeExplicitOnlyTest.java new file mode 100644 index 00000000000..dc71cca7cc8 --- /dev/null +++ b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryContextSizeExplicitOnlyTest.java @@ -0,0 +1,109 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.memory; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.SailConnection; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Mirrors LMDB context-size tests using MemoryStoreConnection, ensuring size(context) and size() count explicit-only + * statements even when inferred statements exist. + */ +public class MemoryContextSizeExplicitOnlyTest { + + private MemoryStore store; + + @BeforeEach + public void setUp() { + store = new MemoryStore(); + store.init(); + } + + @AfterEach + public void tearDown() { + if (store != null) { + store.shutDown(); + } + } + + @Test + public void sizeContext_excludesInferred_afterCommit() { + try (SailConnection raw = store.getConnection()) { + MemoryStoreConnection conn = (MemoryStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + conn.addInferredStatement(vf.createIRI("urn:s2"), p, vf.createLiteral("y"), ctx); + conn.commit(); + + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + } + } + + @Test + public void sizeContext_excludesInferred_duringTxn() { + try (SailConnection raw = store.getConnection()) { + MemoryStoreConnection conn = (MemoryStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + conn.addInferredStatement(vf.createIRI("urn:s2"), p, vf.createLiteral("y"), ctx); + + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + + conn.commit(); + } + } + + @Test + public void sizeContext_onlyExplicit() { + try (SailConnection raw = store.getConnection()) { + MemoryStoreConnection conn = (MemoryStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + conn.commit(); + + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + } + } +} diff --git a/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryDatasetSizeTest.java b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryDatasetSizeTest.java new file mode 100644 index 00000000000..fbb9bb37f1d --- /dev/null +++ b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryDatasetSizeTest.java @@ -0,0 +1,125 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.memory; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.eclipse.rdf4j.common.transaction.IsolationLevels; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.base.SailDataset; +import org.eclipse.rdf4j.sail.base.SailSink; +import org.eclipse.rdf4j.sail.base.SailSource; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Verifies dataset.size() semantics for MemoryStore across explicit, inferred and mixed data. + */ +public class MemoryDatasetSizeTest { + + private MemoryStore store; + + @BeforeEach + public void setUp() { + store = new MemoryStore(); + store.init(); + } + + @AfterEach + public void tearDown() { + if (store != null) { + store.shutDown(); + } + } + + @Test + public void explicitDatasetSize_countsOnlyExplicit() throws Exception { + ValueFactory vf = store.getValueFactory(); + IRI s = vf.createIRI("urn:explicit:s"); + IRI p = vf.createIRI("urn:p"); + IRI o = vf.createIRI("urn:o"); + + SailSource explicit = store.getSailStore().getExplicitSailSource(); + SailSource inferred = store.getSailStore().getInferredSailSource(); + + try (SailSink sink = explicit.sink(IsolationLevels.NONE)) { + sink.approve(s, p, o, null); + sink.flush(); + } + + try (SailDataset ds = explicit.dataset(IsolationLevels.NONE)) { + assertEquals(1L, ds.size(null, null, null), "explicit dataset should count explicit statements"); + } + try (SailDataset ds = inferred.dataset(IsolationLevels.NONE)) { + assertEquals(0L, ds.size(null, null, null), "inferred dataset should not include explicit statements"); + } + } + + @Test + public void inferredDatasetSize_countsOnlyInferred() throws Exception { + ValueFactory vf = store.getValueFactory(); + IRI s = vf.createIRI("urn:inferred:s"); + IRI p = vf.createIRI("urn:p"); + IRI o = vf.createIRI("urn:o"); + + SailSource explicit = store.getSailStore().getExplicitSailSource(); + SailSource inferred = store.getSailStore().getInferredSailSource(); + + try (SailSink sink = inferred.sink(IsolationLevels.NONE)) { + sink.approve(s, p, o, null); + sink.flush(); + } + + try (SailDataset ds = inferred.dataset(IsolationLevels.NONE)) { + assertEquals(1L, ds.size(null, null, null), "inferred dataset should count inferred statements"); + } + try (SailDataset ds = explicit.dataset(IsolationLevels.NONE)) { + assertEquals(0L, ds.size(null, null, null), "explicit dataset should not include inferred statements"); + } + } + + @Test + public void mixedDatasets_eachCountsOwn_andCombinedSums() throws Exception { + ValueFactory vf = store.getValueFactory(); + IRI s1 = vf.createIRI("urn:explicit:s"); + IRI s2 = vf.createIRI("urn:inferred:s"); + IRI p = vf.createIRI("urn:p"); + IRI o = vf.createIRI("urn:o"); + + SailSource explicit = store.getSailStore().getExplicitSailSource(); + SailSource inferred = store.getSailStore().getInferredSailSource(); + + try (SailSink sink = explicit.sink(IsolationLevels.NONE)) { + sink.approve(s1, p, o, null); + sink.flush(); + } + try (SailSink sink = inferred.sink(IsolationLevels.NONE)) { + sink.approve(s2, p, o, null); + sink.flush(); + } + + long explicitSize; + long inferredSize; + try (SailDataset ds = explicit.dataset(IsolationLevels.NONE)) { + explicitSize = ds.size(null, null, null); + } + try (SailDataset ds = inferred.dataset(IsolationLevels.NONE)) { + inferredSize = ds.size(null, null, null); + } + + assertEquals(1L, explicitSize, "explicit dataset should count explicit statements only"); + assertEquals(1L, inferredSize, "inferred dataset should count inferred statements only"); + assertEquals(2L, explicitSize + inferredSize, "combined explicit+inferred should sum to total statements"); + } +} From 8443e240d672b6140f7a27dc31e130a46629e268 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Oct 2025 09:27:28 +0200 Subject: [PATCH 18/18] fixes --- AGENTS.md | 2 +- .../rdf4j/sail/lmdb/LmdbSailStore.java | 30 +++++++++++++------ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 43824dfe312..d8489a780ab 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -423,7 +423,7 @@ Assertions are executable claims about what must be true. Use **temporary tripwi ## Source File Headers -Strict requirement — copy/paste exactly. All new Java source files MUST begin with the exact header below. The text, spacing, punctuation, URL, and SPDX line must be identical. Replace `${year}` with the correct current year at the time the file is created. +Strict requirement — copy/paste exactly. All new Java source files MUST begin with the exact header below with the exact lines. The text, spacing, punctuation, URL, and SPDX line must be identical. Replace `${year}` with the correct current year at the time the file is created. Hint: get the current year with `date +%Y`. diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java index 12d1f68f8fd..e218d84aa82 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java @@ -416,16 +416,17 @@ CloseableIteration createStatementIterator( /** * Returns the number of statements that match the specified pattern. * - * @param subj The subject of the pattern, or null to indicate a wildcard. - * @param pred The predicate of the pattern, or null to indicate a wildcard. - * @param obj The object of the pattern, or null to indicate a wildcard. - * @param contexts The context(s) of the pattern. Note that this parameter is a vararg and as such is optional. If - * no contexts are supplied the method operates on the entire repository. + * @param subj The subject of the pattern, or null to indicate a wildcard. + * @param pred The predicate of the pattern, or null to indicate a wildcard. + * @param obj The object of the pattern, or null to indicate a wildcard. + * @param includeImplicit Whether to include inferred statements in addition to explicit. + * @param contexts The context(s) of the pattern. Note that this parameter is a vararg and as such is + * optional. If no contexts are supplied the method operates on the entire repository. * @return The number of statements that match the specified pattern. * @throws SailException If an error occurred while determining the size. */ private long size(final TxnManager.Txn txn, final Resource subj, final IRI pred, final Value obj, - final Resource... contexts) + final boolean includeImplicit, final Resource... contexts) throws SailException { try { long totalSize = 0; @@ -457,7 +458,8 @@ private long size(final TxnManager.Txn txn, final Resource subj, final IRI pred, // Handle context selection mirroring getStatements semantics if (contexts.length == 0) { // wildcard across all contexts - totalSize = tripleStore.cardinalityExact(txn, subjID, predID, objID, LmdbValue.UNKNOWN_ID, false); + totalSize = tripleStore.cardinalityExact(txn, subjID, predID, objID, LmdbValue.UNKNOWN_ID, + includeImplicit); } else { for (Resource context : contexts) { Long contextIDToCount = null; @@ -473,7 +475,8 @@ private long size(final TxnManager.Txn txn, final Resource subj, final IRI pred, } if (contextIDToCount != null) { - totalSize += tripleStore.cardinalityExact(txn, subjID, predID, objID, contextIDToCount, false); + totalSize += tripleStore.cardinalityExact(txn, subjID, predID, objID, contextIDToCount, + includeImplicit); } } } @@ -1040,7 +1043,16 @@ public Comparator getComparator() { public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) throws SailException { try { - return LmdbSailStore.this.size(txn, subj, pred, obj, contexts); + if (explicit) { + // explicit dataset: count explicit statements only + return LmdbSailStore.this.size(txn, subj, pred, obj, false, contexts); + } else { + // inferred dataset: count inferred-only = (explicit+inferred) - explicit + long total = LmdbSailStore.this.size(txn, subj, pred, obj, true, contexts); + long explicitOnly = LmdbSailStore.this.size(txn, subj, pred, obj, false, contexts); + long inferredOnly = total - explicitOnly; + return inferredOnly >= 0 ? inferredOnly : 0; + } } catch (final Exception e) { throw new SailException(e); }