Skip to content

Commit 5d199e2

Browse files
committed
OpenSearch: omit explicit IDs when manageDocumentIds=false; add unit/ITs; AWS Serverless compat.
- Update OpenSearchVectorStore#doAdd to omit explicit document IDs when manageDocumentIds=false, enabling AWS OpenSearch Serverless compatibility - Add unit tests for document ID management logic in doAdd - Add integration tests covering explicit/non-explicit ID modes and delete-by-ID behavior Closes gh-3818 Signed-off-by: sanghun <vitash1215@gmail.com>
1 parent ad93613 commit 5d199e2

File tree

3 files changed

+413
-2
lines changed

3 files changed

+413
-2
lines changed

vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,16 @@
102102
* }</pre>
103103
*
104104
* <p>
105+
* AWS OpenSearch Serverless usage example:
106+
* </p>
107+
* <pre>{@code
108+
* OpenSearchVectorStore vectorStore = OpenSearchVectorStore.builder(openSearchClient, embeddingModel)
109+
* .initializeSchema(true)
110+
* .manageDocumentIds(false) // Required for AWS OpenSearch Serverless
111+
* .build();
112+
* }</pre>
113+
*
114+
* <p>
105115
* Advanced configuration example:
106116
* </p>
107117
* <pre>{@code
@@ -170,6 +180,8 @@ public class OpenSearchVectorStore extends AbstractObservationVectorStore implem
170180

171181
private String similarityFunction;
172182

183+
private final boolean manageDocumentIds;
184+
173185
/**
174186
* Creates a new OpenSearchVectorStore using the builder pattern.
175187
* @param builder The configured builder instance
@@ -187,6 +199,7 @@ protected OpenSearchVectorStore(Builder builder) {
187199
// https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/#spaces
188200
this.similarityFunction = builder.similarityFunction;
189201
this.initializeSchema = builder.initializeSchema;
202+
this.manageDocumentIds = builder.manageDocumentIds;
190203
}
191204

192205
/**
@@ -210,14 +223,27 @@ public void doAdd(List<Document> documents) {
210223
for (Document document : documents) {
211224
OpenSearchDocument openSearchDocument = new OpenSearchDocument(document.getId(), document.getText(),
212225
document.getMetadata(), embedding.get(documents.indexOf(document)));
213-
bulkRequestBuilder.operations(op -> op
214-
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
226+
227+
// Conditionally set document ID based on manageDocumentIds flag
228+
if (this.manageDocumentIds) {
229+
bulkRequestBuilder.operations(op -> op
230+
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
231+
}
232+
else {
233+
bulkRequestBuilder
234+
.operations(op -> op.index(idx -> idx.index(this.index).document(openSearchDocument)));
235+
}
215236
}
216237
bulkRequest(bulkRequestBuilder.build());
217238
}
218239

219240
@Override
220241
public void doDelete(List<String> idList) {
242+
if (!this.manageDocumentIds) {
243+
logger.warn("Document ID management is disabled. Delete operations may not work as expected "
244+
+ "since document IDs are auto-generated by OpenSearch. Consider using filter-based deletion instead.");
245+
}
246+
221247
BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder();
222248
for (String id : idList) {
223249
bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.index).id(id)));
@@ -417,6 +443,8 @@ public static class Builder extends AbstractVectorStoreBuilder<Builder> {
417443

418444
private String similarityFunction = COSINE_SIMILARITY_FUNCTION;
419445

446+
private boolean manageDocumentIds = false;
447+
420448
/**
421449
* Sets the OpenSearch client.
422450
* @param openSearchClient The OpenSearch client to use
@@ -488,6 +516,28 @@ public Builder similarityFunction(String similarityFunction) {
488516
return this;
489517
}
490518

519+
/**
520+
* Sets whether to manage document IDs during indexing operations.
521+
* <p>
522+
* When set to {@code true} (default), document IDs will be explicitly set during
523+
* indexing operations. When set to {@code false}, OpenSearch will auto-generate
524+
* document IDs, which is required for AWS OpenSearch Serverless vector search
525+
* collections.
526+
* </p>
527+
* <p>
528+
* Note: When document ID management is disabled, the {@link #doDelete(List)}
529+
* method may not work as expected since document IDs are auto-generated by
530+
* OpenSearch.
531+
* </p>
532+
* @param manageDocumentIds true to manage document IDs (default), false to let
533+
* OpenSearch auto-generate IDs
534+
* @return The builder instance
535+
*/
536+
public Builder manageDocumentIds(boolean manageDocumentIds) {
537+
this.manageDocumentIds = manageDocumentIds;
538+
return this;
539+
}
540+
491541
/**
492542
* Builds a new OpenSearchVectorStore instance with the configured properties.
493543
* @return A new OpenSearchVectorStore instance

vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreIT.java

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,161 @@ void getNativeClientTest() {
564564
});
565565
}
566566

567+
@ParameterizedTest(name = "manageDocumentIds={0}")
568+
@ValueSource(booleans = { true, false })
569+
void testManageDocumentIdsSetting(boolean manageDocumentIds) {
570+
getContextRunner().run(context -> {
571+
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);
572+
573+
// Create a new vector store with specific manageDocumentIds setting
574+
OpenSearchVectorStore testVectorStore = OpenSearchVectorStore
575+
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
576+
context.getBean(EmbeddingModel.class))
577+
.manageDocumentIds(manageDocumentIds)
578+
.index("test_manage_document_ids_" + manageDocumentIds)
579+
.initializeSchema(true)
580+
.build();
581+
582+
// Test documents
583+
List<Document> testDocuments = List.of(new Document("doc1", "Test content 1", Map.of("key1", "value1")),
584+
new Document("doc2", "Test content 2", Map.of("key2", "value2")));
585+
586+
// Add documents
587+
testVectorStore.add(testDocuments);
588+
589+
// Wait for indexing
590+
Awaitility.await()
591+
.until(() -> testVectorStore
592+
.similaritySearch(SearchRequest.builder().query("Test content").topK(2).build()), hasSize(2));
593+
594+
// Search and verify results
595+
List<Document> results = testVectorStore
596+
.similaritySearch(SearchRequest.builder().query("Test content").topK(2).build());
597+
598+
assertThat(results).hasSize(2);
599+
600+
// Verify document content and metadata are preserved
601+
assertThat(results.stream().map(Document::getText).toList()).containsExactlyInAnyOrder("Test content 1",
602+
"Test content 2");
603+
604+
assertThat(results.stream().map(doc -> doc.getMetadata().get("key1")).toList()).contains("value1");
605+
assertThat(results.stream().map(doc -> doc.getMetadata().get("key2")).toList()).contains("value2");
606+
607+
// Clean up
608+
testVectorStore.delete(testDocuments.stream().map(Document::getId).toList());
609+
});
610+
}
611+
612+
@Test
613+
void testManageDocumentIdsFalseForAWSOpenSearchServerless() {
614+
getContextRunner().run(context -> {
615+
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);
616+
617+
// Create vector store with manageDocumentIds=false (AWS OpenSearch Serverless
618+
// mode)
619+
OpenSearchVectorStore awsCompatibleVectorStore = OpenSearchVectorStore
620+
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
621+
context.getBean(EmbeddingModel.class))
622+
.manageDocumentIds(false)
623+
.index("test_aws_serverless_compatible")
624+
.initializeSchema(true)
625+
.build();
626+
627+
// Test documents with IDs (these should be ignored when
628+
// manageDocumentIds=false)
629+
List<Document> testDocuments = List.of(
630+
new Document("custom-id-1", "AWS Serverless content 1", Map.of("env", "aws-serverless")),
631+
new Document("custom-id-2", "AWS Serverless content 2", Map.of("env", "aws-serverless")));
632+
633+
// Add documents - should work without explicit document ID errors
634+
awsCompatibleVectorStore.add(testDocuments);
635+
636+
// Wait for indexing
637+
Awaitility.await()
638+
.until(() -> awsCompatibleVectorStore
639+
.similaritySearch(SearchRequest.builder().query("AWS Serverless").topK(2).build()), hasSize(2));
640+
641+
// Search and verify results
642+
List<Document> results = awsCompatibleVectorStore
643+
.similaritySearch(SearchRequest.builder().query("AWS Serverless").topK(2).build());
644+
645+
assertThat(results).hasSize(2);
646+
647+
// Verify content is preserved
648+
assertThat(results.stream().map(Document::getText).toList())
649+
.containsExactlyInAnyOrder("AWS Serverless content 1", "AWS Serverless content 2");
650+
651+
// Verify metadata is preserved
652+
assertThat(results.stream().map(doc -> doc.getMetadata().get("env")).toList())
653+
.containsOnly("aws-serverless");
654+
655+
// Clean up
656+
awsCompatibleVectorStore.delete(List.of("_all"));
657+
});
658+
}
659+
660+
@Test
661+
void testManageDocumentIdsTrueWithExplicitIds() {
662+
getContextRunner().run(context -> {
663+
OpenSearchVectorStore vectorStore = context.getBean("vectorStore", OpenSearchVectorStore.class);
664+
665+
// Create vector store with manageDocumentIds=true (default behavior)
666+
OpenSearchVectorStore explicitIdVectorStore = OpenSearchVectorStore
667+
.builder((OpenSearchClient) vectorStore.getNativeClient().orElseThrow(),
668+
context.getBean(EmbeddingModel.class))
669+
.manageDocumentIds(true)
670+
.index("test_explicit_ids")
671+
.initializeSchema(true)
672+
.build();
673+
674+
// Test documents with specific IDs
675+
List<Document> testDocuments = List.of(
676+
new Document("explicit-id-1", "Explicit ID content 1", Map.of("type", "explicit")),
677+
new Document("explicit-id-2", "Explicit ID content 2", Map.of("type", "explicit")));
678+
679+
// Add documents
680+
explicitIdVectorStore.add(testDocuments);
681+
682+
// Wait for indexing
683+
Awaitility.await()
684+
.until(() -> explicitIdVectorStore
685+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build()), hasSize(2));
686+
687+
// Search and verify results
688+
List<Document> results = explicitIdVectorStore
689+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build());
690+
691+
assertThat(results).hasSize(2);
692+
693+
// Verify document IDs are preserved
694+
assertThat(results.stream().map(Document::getId).toList()).containsExactlyInAnyOrder("explicit-id-1",
695+
"explicit-id-2");
696+
697+
// Verify content and metadata
698+
assertThat(results.stream().map(Document::getText).toList())
699+
.containsExactlyInAnyOrder("Explicit ID content 1", "Explicit ID content 2");
700+
701+
assertThat(results.stream().map(doc -> doc.getMetadata().get("type")).toList()).containsOnly("explicit");
702+
703+
// Test deletion by specific IDs
704+
explicitIdVectorStore.delete(List.of("explicit-id-1"));
705+
706+
Awaitility.await()
707+
.until(() -> explicitIdVectorStore
708+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build()), hasSize(1));
709+
710+
// Verify only one document remains
711+
results = explicitIdVectorStore
712+
.similaritySearch(SearchRequest.builder().query("Explicit ID").topK(2).build());
713+
714+
assertThat(results).hasSize(1);
715+
assertThat(results.get(0).getId()).isEqualTo("explicit-id-2");
716+
717+
// Clean up
718+
explicitIdVectorStore.delete(List.of("explicit-id-2"));
719+
});
720+
}
721+
567722
@SpringBootConfiguration
568723
@EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class })
569724
public static class TestApplication {

0 commit comments

Comments
 (0)