diff --git a/libs/langchain-mongodb/langchain_mongodb/vectorstores.py b/libs/langchain-mongodb/langchain_mongodb/vectorstores.py index 15b593b9..9437150e 100644 --- a/libs/langchain-mongodb/langchain_mongodb/vectorstores.py +++ b/libs/langchain-mongodb/langchain_mongodb/vectorstores.py @@ -870,3 +870,36 @@ def create_vector_search_index( wait_until_complete=wait_until_complete, **kwargs, ) # type: ignore [operator] + + def similarity_search_by_vector( + self, + embedding: list[float], + k: int = 4, + **kwargs: Any, + ) -> list[Document]: + """Return MongoDB documents most similar to the given query vector. + + Atlas Vector Search eliminates the need to run a separate + search system alongside your database. + + Args: + embedding: Embedding vector to search for. + k: (Optional) number of documents to return. Defaults to 4. + pre_filter: List of MQL match expressions comparing an indexed field + post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages + to filter/process results after $vectorSearch. + oversampling_factor: Multiple of k used when generating number of candidates + at each step in the HNSW Vector Search. + include_embeddings: If True, the embedding vector of each result + will be included in metadata. + kwargs: Additional arguments are specific to the search_type + + Returns: + List of documents most similar to the query vector. + """ + tuple_list = self._similarity_search_with_score( + embedding, + k=k, + **kwargs, + ) + return [doc for doc, _ in tuple_list] diff --git a/libs/langchain-mongodb/tests/integration_tests/test_vectorstore_from_texts.py b/libs/langchain-mongodb/tests/integration_tests/test_vectorstore_from_texts.py index 4266f4e0..59b931dc 100644 --- a/libs/langchain-mongodb/tests/integration_tests/test_vectorstore_from_texts.py +++ b/libs/langchain-mongodb/tests/integration_tests/test_vectorstore_from_texts.py @@ -117,3 +117,41 @@ def test_search_pre_filter( "Sandwich", k=3, pre_filter={"c": {"$gt": 0}} ) assert len(matches_filter) == 1 + + +def test_similarity_search_by_vector( + vectorstore: PatchedMongoDBAtlasVectorSearch, + embeddings: Embeddings, + texts: List[str], +) -> None: + # Test similarity_search_by_vector method + # First, embed a query text to get a vector + query_text = "Sandwich" + query_vector = embeddings.embed_query(query_text) + + # Perform search by vector + output = vectorstore.similarity_search_by_vector(query_vector, k=2) + + # Should return results + assert len(output) == 2 + # Results should be Document objects + assert all(hasattr(doc, "page_content") for doc in output) + assert all(hasattr(doc, "metadata") for doc in output) + + +def test_similarity_search_by_vector_with_filter( + vectorstore: PatchedMongoDBAtlasVectorSearch, + embeddings: Embeddings, +) -> None: + # Test similarity_search_by_vector with pre_filter + query_text = "Sandwich" + query_vector = embeddings.embed_query(query_text) + + # Search with filter + filtered_output = vectorstore.similarity_search_by_vector( + query_vector, k=3, pre_filter={"c": {"$gt": 0}} + ) + + # Should only return documents matching the filter + assert len(filtered_output) == 1 + assert "c" in filtered_output[0].metadata