@@ -59,9 +59,15 @@ class SalesSupportModel:
5959 )
6060
6161 # embeddings
62- texts_splitter_results : List [Document ]
62+ text_splitter = RecursiveCharacterTextSplitter (
63+ chunk_size = 100 ,
64+ chunk_overlap = 0 ,
65+ )
6366 openai_embedding = OpenAIEmbeddings ()
64- query_result : List [float ]
67+ pinecone_search = Pinecone .from_existing_index (
68+ Credentials .PINECONE_INDEX_NAME ,
69+ embedding = openai_embedding ,
70+ )
6571
6672 def cached_chat_request (self , system_message : str , human_message : str ) -> SystemMessage :
6773 """Cached chat request."""
@@ -87,23 +93,6 @@ def split_text(self, text: str) -> List[Document]:
8793 retval = text_splitter .create_documents ([text ])
8894 return retval
8995
90- def embed (self , text : str ) -> List [float ]:
91- """Embed."""
92- text_splitter = RecursiveCharacterTextSplitter (
93- chunk_size = 100 ,
94- chunk_overlap = 0 ,
95- )
96- texts_splitter_results = text_splitter .create_documents ([text ])
97- embedding = texts_splitter_results [0 ].page_content
98- # pylint: disable=no-member
99- self .openai_embedding .embed_query (embedding )
100-
101- Pinecone .from_documents (
102- documents = texts_splitter_results ,
103- embedding = self .openai_embedding ,
104- index_name = Credentials .PINECONE_INDEX_NAME ,
105- )
106-
10796 def load (self , filepath : str ):
10897 """
10998 Embed PDF.
@@ -118,14 +107,20 @@ def load(self, filepath: str):
118107 for pdf_file in pdf_files :
119108 i += 1
120109 j = len (pdf_files )
121- print (f"Loading PDF { i } of { j } : " )
110+ print (f"Loading PDF { i } of { j } : " , pdf_file )
122111 loader = PyPDFLoader (file_path = pdf_file )
123112 docs = loader .load ()
124113 k = 0
125114 for doc in docs :
126115 k += 1
127116 print (k * "-" , end = "\r " )
128- self .embed (doc .page_content )
117+ texts_splitter_results = self .text_splitter .create_documents ([doc .page_content ])
118+ self .pinecone_search .from_existing_index (
119+ index_name = Credentials .PINECONE_INDEX_NAME ,
120+ embedding = self .openai_embedding ,
121+ text_key = texts_splitter_results ,
122+ )
123+
129124 print ("Finished loading PDFs" )
130125
131126 def rag (self , prompt : str ):
@@ -142,11 +137,7 @@ def format_docs(docs):
142137 """Format docs."""
143138 return "\n \n " .join (doc .page_content for doc in docs )
144139
145- pinecone_search = Pinecone .from_existing_index (
146- Credentials .PINECONE_INDEX_NAME ,
147- embedding = self .openai_embedding ,
148- )
149- retriever = pinecone_search .as_retriever ()
140+ retriever = self .pinecone_search .as_retriever ()
150141
151142 # Use the retriever to get relevant documents
152143 documents = retriever .get_relevant_documents (query = prompt )
0 commit comments