Skip to content

Commit 5a65613

Browse files
committed
Refine Cassandra Vector Search documentation.
Add metadata prefiltering. Update documentation to reflect Cassandra specifics regarding sorting. Closes #1608
1 parent f36bc6d commit 5a65613

File tree

5 files changed

+30
-31
lines changed

5 files changed

+30
-31
lines changed

spring-data-cassandra/src/test/java/org/springframework/data/cassandra/core/CassandraVectorSearchIntegrationTests.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package org.springframework.data.cassandra.core;
1717

1818
import static org.assertj.core.api.Assertions.*;
19+
import static org.springframework.data.cassandra.core.query.Criteria.*;
1920

2021
import java.util.List;
2122
import java.util.UUID;
@@ -86,16 +87,19 @@ void shouldQueryVector() {
8687

8788
Comments one = new Comments();
8889
one.setId(UUID.randomUUID());
90+
one.setLanguage("en");
8991
one.setVector(Vector.of(0.45f, 0.09f, 0.01f, 0.2f, 0.11f));
9092
one.setComment("Raining too hard should have postponed");
9193

9294
Comments two = new Comments();
9395
two.setId(UUID.randomUUID());
96+
two.setLanguage("en");
9497
two.setVector(Vector.of(0.99f, 0.5f, 0.99f, 0.1f, 0.34f));
9598
two.setComment("Second rest stop was out of water");
9699

97100
Comments three = new Comments();
98101
three.setId(UUID.randomUUID());
102+
three.setLanguage("en");
99103
three.setVector(Vector.of(0.9f, 0.54f, 0.12f, 0.1f, 0.95f));
100104
three.setComment("LATE RIDERS SHOULD NOT DELAY THE START");
101105

@@ -107,7 +111,7 @@ void shouldQueryVector() {
107111

108112
Columns columns = Columns.empty().include("comment").select("vector",
109113
it -> it.similarity(vector).cosine().as("similarity"));
110-
Query query = Query.select(columns).limit(3).sort(VectorSort.ann("vector", vector));
114+
Query query = Query.select(columns).and(where("language").is("en")).limit(3).sort(VectorSort.ann("vector", vector));
111115

112116
List<CommentSearch> result = template.query(Comments.class).as(CommentSearch.class).matching(query).all();
113117

@@ -134,6 +138,7 @@ static class Comments {
134138

135139
@Id UUID id;
136140
String comment;
141+
@SaiIndexed String language;
137142

138143
@VectorType(dimensions = 5)
139144
@SaiIndexed Vector vector;
@@ -154,6 +159,14 @@ public void setComment(String comment) {
154159
this.comment = comment;
155160
}
156161

162+
public String getLanguage() {
163+
return language;
164+
}
165+
166+
public void setLanguage(String language) {
167+
this.language = language;
168+
}
169+
157170
public Vector getVector() {
158171
return vector;
159172
}

spring-data-cassandra/src/test/java/org/springframework/data/cassandra/repository/VectorSearchIntegrationTests.java

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -92,28 +92,28 @@ void setUp() {
9292
repository.saveAll(List.of(w1, w2, w3, w4));
9393
}
9494

95-
@Test // GH-
95+
@Test // GH-1573
9696
void searchWithoutScoringFunctionShouldFail() {
9797
assertThatExceptionOfType(QueryCreationException.class)
9898
.isThrownBy(() -> repository.searchByEmbeddingNear(VECTOR, Limit.of(100)));
9999
}
100100

101-
@Test // GH-
101+
@Test // GH-1573
102102
void shouldConsiderScoringFunction() {
103103

104-
SearchResults<WithVectorFields> results = repository.searchByEmbeddingNear(VECTOR,
105-
ScoringFunction.dotProduct(), Limit.of(100));
104+
SearchResults<WithVectorFields> results = repository.searchByEmbeddingNearAndCountry(VECTOR,
105+
ScoringFunction.dotProduct(), "de", Limit.of(100));
106106

107-
assertThat(results).hasSize(4);
107+
assertThat(results).hasSize(3);
108108

109109
for (SearchResult<WithVectorFields> result : results) {
110110
assertThat(result.getScore()).isInstanceOf(Similarity.class);
111111
assertThat(result.getScore().getValue()).isNotCloseTo(0d, offset(0.1d));
112112
}
113113

114-
results = repository.searchByEmbeddingNear(VECTOR, VectorScoringFunctions.EUCLIDEAN, Limit.of(100));
114+
results = repository.searchByEmbeddingNearAndCountry(VECTOR, VectorScoringFunctions.EUCLIDEAN, "de", Limit.of(100));
115115

116-
assertThat(results).hasSize(4);
116+
assertThat(results).hasSize(3);
117117

118118
for (SearchResult<WithVectorFields> result : results) {
119119

@@ -122,7 +122,7 @@ void shouldConsiderScoringFunction() {
122122
}
123123
}
124124

125-
@Test // GH-
125+
@Test // GH-1573
126126
void shouldRunAnnotatedSearchByVector() {
127127

128128
SearchResults<WithVectorFields> results = repository.searchAnnotatedByEmbeddingNear(VECTOR, Limit.of(100));
@@ -134,7 +134,7 @@ void shouldRunAnnotatedSearchByVector() {
134134
}
135135
}
136136

137-
@Test // GH-
137+
@Test // GH-1573
138138
void shouldFindByVector() {
139139

140140
List<WithVectorFields> result = repository.findByEmbeddingNear(VECTOR, Limit.of(100));
@@ -144,7 +144,8 @@ void shouldFindByVector() {
144144

145145
interface VectorSearchRepository extends CrudRepository<WithVectorFields, UUID> {
146146

147-
SearchResults<WithVectorFields> searchByEmbeddingNear(Vector embedding, ScoringFunction function, Limit limit);
147+
SearchResults<WithVectorFields> searchByEmbeddingNearAndCountry(Vector embedding, ScoringFunction function,
148+
String country, Limit limit);
148149

149150
SearchResults<WithVectorFields> searchByEmbeddingNear(Vector embedding, Limit limit);
150151

@@ -159,7 +160,7 @@ interface VectorSearchRepository extends CrudRepository<WithVectorFields, UUID>
159160
static class WithVectorFields {
160161

161162
@Id String id;
162-
String country;
163+
@SaiIndexed String country;
163164
String description;
164165

165166
@VectorType(dimensions = 5)

src/main/antora/modules/ROOT/partials/vector-search-model-include.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
class Comment {
55
66
@Id String id;
7+
@SaiIndexed
78
String country;
89
String comment;
910

src/main/antora/modules/ROOT/partials/vector-search-repository-include.adoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
----
55
interface CommentRepository extends Repository<Comment, String> {
66
7-
SearchResults<Comment> searchByEmbeddingNear(Vector vector, ScoringFunction function, Limit limit);
7+
SearchResults<Comment> searchByEmbeddingNearAndCountry(Vector vector, ScoringFunction function, String country, Limit limit);
88
99
}
1010
11-
SearchResults<Comment> results = repository.searchByEmbeddingNear(Vector.of(…), ScoringFunction.cosine(), Limit.of(10));
11+
SearchResults<Comment> results = repository.searchByEmbeddingNear(Vector.of(…), ScoringFunction.cosine(), "…", Limit.of(10));
1212
----
1313
====

src/main/antora/modules/ROOT/partials/vector-search.adoc

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -147,21 +147,5 @@ If an annotated query does not define e.g. the score, then the score value in th
147147
[[vector-search.method.sorting]]
148148
=== Sorting
149149

150-
By default, search results are ordered according to their score.
151-
You can override sorting by using the `Sort` parameter:
150+
Cassandra Vector search results are ordered according to their score.
152151

153-
.Using `Sort` in Repository Search Methods
154-
====
155-
[source,java]
156-
----
157-
interface CommentRepository extends Repository<Comment, String> {
158-
159-
SearchResults<Comment> searchByEmbeddingNearOrderByCountry(Vector vector, Score score);
160-
161-
SearchResults<Comment> searchByEmbeddingWithin(Vector vector, Score score, Sort sort);
162-
}
163-
----
164-
====
165-
166-
Please note that custom sorting does not allow expressing the score as a sorting criteria.
167-
You can only refer to domain properties.

0 commit comments

Comments
 (0)