|
22 | 22 | from langchain_core.embeddings import Embeddings |
23 | 23 | from langchain_core.runnables.config import run_in_executor |
24 | 24 | from langchain_core.vectorstores import VectorStore |
25 | | -from pymongo import MongoClient, ReplaceOne |
| 25 | +from pymongo import MongoClient |
26 | 26 | from pymongo.collection import Collection |
27 | 27 | from pymongo.errors import CollectionInvalid |
| 28 | +from pymongo_search_utils import bulk_embed_and_insert_texts |
28 | 29 |
|
29 | 30 | from langchain_mongodb.index import ( |
30 | 31 | create_vector_search_index, |
@@ -429,28 +430,15 @@ def bulk_embed_and_insert_texts( |
429 | 430 |
|
430 | 431 | See add_texts for additional details. |
431 | 432 | """ |
432 | | - if not texts: |
433 | | - return [] |
434 | | - # Compute embedding vectors |
435 | | - embeddings = self._embedding.embed_documents(list(texts)) |
436 | | - if not ids: |
437 | | - ids = [str(ObjectId()) for _ in range(len(list(texts)))] |
438 | | - docs = [ |
439 | | - { |
440 | | - "_id": str_to_oid(i), |
441 | | - self._text_key: t, |
442 | | - self._embedding_key: embedding, |
443 | | - **m, |
444 | | - } |
445 | | - for i, t, m, embedding in zip( |
446 | | - ids, texts, metadatas, embeddings, strict=True |
447 | | - ) |
448 | | - ] |
449 | | - operations = [ReplaceOne({"_id": doc["_id"]}, doc, upsert=True) for doc in docs] |
450 | | - # insert the documents in MongoDB Atlas |
451 | | - result = self._collection.bulk_write(operations) |
452 | | - assert result.upserted_ids is not None |
453 | | - return [oid_to_str(_id) for _id in result.upserted_ids.values()] |
| 433 | + return bulk_embed_and_insert_texts( |
| 434 | + texts, |
| 435 | + metadatas, |
| 436 | + self._embedding.embed_documents, |
| 437 | + self._collection, |
| 438 | + self._text_key, |
| 439 | + self._embedding_key, |
| 440 | + ids, |
| 441 | + ) |
454 | 442 |
|
455 | 443 | def add_documents( |
456 | 444 | self, |
|
0 commit comments