2222from langchain_core .embeddings import Embeddings
2323from langchain_core .runnables .config import run_in_executor
2424from langchain_core .vectorstores import VectorStore
25- from pymongo import MongoClient , ReplaceOne
25+ from pymongo import MongoClient
2626from pymongo .collection import Collection
2727from pymongo .errors import CollectionInvalid
28+ from pymongo_vectorsearch_utils import bulk_embed_and_insert_texts
2829
2930from langchain_mongodb .index import (
3031 create_vector_search_index ,
@@ -362,11 +363,11 @@ def add_texts(
362363 metadatas_batch .append (metadata )
363364 if (j + 1 ) % batch_size == 0 or size >= 47_000_000 :
364365 if ids :
365- batch_res = self . bulk_embed_and_insert_texts (
366+ batch_res = bulk_embed_and_insert_texts (
366367 texts_batch , metadatas_batch , ids [i : j + 1 ]
367368 )
368369 else :
369- batch_res = self . bulk_embed_and_insert_texts (
370+ batch_res = bulk_embed_and_insert_texts (
370371 texts_batch , metadatas_batch
371372 )
372373 result_ids .extend (batch_res )
@@ -376,13 +377,11 @@ def add_texts(
376377 i = j + 1
377378 if texts_batch :
378379 if ids :
379- batch_res = self . bulk_embed_and_insert_texts (
380+ batch_res = bulk_embed_and_insert_texts (
380381 texts_batch , metadatas_batch , ids [i : j + 1 ]
381382 )
382383 else :
383- batch_res = self .bulk_embed_and_insert_texts (
384- texts_batch , metadatas_batch
385- )
384+ batch_res = bulk_embed_and_insert_texts (texts_batch , metadatas_batch )
386385 result_ids .extend (batch_res )
387386 return result_ids
388387
@@ -419,37 +418,6 @@ def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
419418 docs .append (Document (page_content = text , id = oid_to_str (_id ), metadata = doc ))
420419 return docs
421420
422- def bulk_embed_and_insert_texts (
423- self ,
424- texts : Union [List [str ], Iterable [str ]],
425- metadatas : Union [List [dict ], Generator [dict , Any , Any ]],
426- ids : Optional [List [str ]] = None ,
427- ) -> List [str ]:
428- """Bulk insert single batch of texts, embeddings, and optionally ids.
429-
430- See add_texts for additional details.
431- """
432- if not texts :
433- return []
434- # Compute embedding vectors
435- embeddings = self ._embedding .embed_documents (list (texts ))
436- if not ids :
437- ids = [str (ObjectId ()) for _ in range (len (list (texts )))]
438- docs = [
439- {
440- "_id" : str_to_oid (i ),
441- self ._text_key : t ,
442- self ._embedding_key : embedding ,
443- ** m ,
444- }
445- for i , t , m , embedding in zip (ids , texts , metadatas , embeddings )
446- ]
447- operations = [ReplaceOne ({"_id" : doc ["_id" ]}, doc , upsert = True ) for doc in docs ]
448- # insert the documents in MongoDB Atlas
449- result = self ._collection .bulk_write (operations )
450- assert result .upserted_ids is not None
451- return [oid_to_str (_id ) for _id in result .upserted_ids .values ()]
452-
453421 def add_documents (
454422 self ,
455423 documents : List [Document ],
@@ -481,7 +449,7 @@ def add_documents(
481449 * [(doc .page_content , doc .metadata ) for doc in documents [start :end ]]
482450 )
483451 result_ids .extend (
484- self . bulk_embed_and_insert_texts (
452+ bulk_embed_and_insert_texts (
485453 texts = texts , metadatas = metadatas , ids = ids [start :end ]
486454 )
487455 )
0 commit comments