Skip to content
33 changes: 33 additions & 0 deletions libs/langchain-mongodb/langchain_mongodb/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,3 +870,36 @@ def create_vector_search_index(
wait_until_complete=wait_until_complete,
**kwargs,
) # type: ignore [operator]

def similarity_search_by_vector(
self,
embedding: list[float],
k: int = 4,
**kwargs: Any,
) -> list[Document]:
"""Return MongoDB documents most similar to the given query vector.

Atlas Vector Search eliminates the need to run a separate
search system alongside your database.

Args:
embedding: Embedding vector to search for.
k: (Optional) number of documents to return. Defaults to 4.
pre_filter: List of MQL match expressions comparing an indexed field
post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages
to filter/process results after $vectorSearch.
oversampling_factor: Multiple of k used when generating number of candidates
at each step in the HNSW Vector Search.
include_embeddings: If True, the embedding vector of each result
will be included in metadata.
kwargs: Additional arguments are specific to the search_type

Returns:
List of documents most similar to the query vector.
"""
tuple_list = self._similarity_search_with_score(
embedding,
k=k,
**kwargs,
)
return [doc for doc, _ in tuple_list]
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,41 @@ def test_search_pre_filter(
"Sandwich", k=3, pre_filter={"c": {"$gt": 0}}
)
assert len(matches_filter) == 1


def test_similarity_search_by_vector(
vectorstore: PatchedMongoDBAtlasVectorSearch,
embeddings: Embeddings,
texts: List[str],
) -> None:
# Test similarity_search_by_vector method
# First, embed a query text to get a vector
query_text = "Sandwich"
query_vector = embeddings.embed_query(query_text)

# Perform search by vector
output = vectorstore.similarity_search_by_vector(query_vector, k=2)

# Should return results
assert len(output) == 2
# Results should be Document objects
assert all(hasattr(doc, "page_content") for doc in output)
assert all(hasattr(doc, "metadata") for doc in output)


def test_similarity_search_by_vector_with_filter(
vectorstore: PatchedMongoDBAtlasVectorSearch,
embeddings: Embeddings,
) -> None:
# Test similarity_search_by_vector with pre_filter
query_text = "Sandwich"
query_vector = embeddings.embed_query(query_text)

# Search with filter
filtered_output = vectorstore.similarity_search_by_vector(
query_vector, k=3, pre_filter={"c": {"$gt": 0}}
)

# Should only return documents matching the filter
assert len(filtered_output) == 1
assert "c" in filtered_output[0].metadata
Loading