55import glob
66
77# general purpose imports
8+ import json
89import logging
910import os
1011
@@ -36,7 +37,7 @@ def create_documents(self, texts):
3637 return documents
3738
3839
39- class PineConeIndex :
40+ class PineconeIndex :
4041 """Pinecone helper class."""
4142
4243 _index : pinecone .Index = None
@@ -46,13 +47,42 @@ class PineConeIndex:
4647 _vector_store : LCPinecone = None
4748
4849 def __init__ (self , index_name : str = None ):
49- self ._index_name = index_name or Config .PINECONE_INDEX_NAME
5050 self .init ()
51+ self .index_name = index_name or Config .PINECONE_INDEX_NAME
52+
53+ @property
54+ def index_name (self ) -> str :
55+ """index name."""
56+ return self ._index_name
57+
58+ @index_name .setter
59+ def index_name (self , value : str ) -> None :
60+ """Set index name."""
61+ if self ._index_name != value :
62+ self .init ()
63+ self ._index_name = value
64+ self .init_index ()
65+
66+ @property
67+ def index (self ) -> pinecone .Index :
68+ """pinecone.Index lazy read-only property."""
69+ if self ._index is None :
70+ self .init_index ()
71+ self ._index = pinecone .Index (index_name = self .index_name )
72+ return self ._index
73+
74+ @property
75+ def initialized (self ) -> bool :
76+ """initialized read-only property."""
77+ indexes = pinecone .manage .list_indexes ()
78+ return self .index_name in indexes
5179
5280 @property
5381 def vector_store (self ) -> LCPinecone :
5482 """Pinecone lazy read-only property."""
5583 if self ._vector_store is None :
84+ if not self .initialized :
85+ self .init_index ()
5686 self ._vector_store = LCPinecone (
5787 index = self .index ,
5888 embedding = self .openai_embeddings ,
@@ -76,41 +106,29 @@ def text_splitter(self) -> TextSplitter:
76106 self ._text_splitter = TextSplitter ()
77107 return self ._text_splitter
78108
79- @property
80- def index_name (self ) -> str :
81- """index name."""
82- return self ._index_name
83-
84- @index_name .setter
85- def index_name (self , value : str ) -> None :
86- """Set index name."""
87- if self ._index_name != value :
88- self ._index_name = value
89- self .initialize ()
90-
91- @property
92- def index (self ) -> pinecone .Index :
93- """pinecone.Index lazy read-only property."""
94- if self ._index is None :
95- try :
96- self ._index = pinecone .Index (index_name = self .index_name )
97- except pinecone .exceptions .PineconeException :
98- # index does not exist, so create it.
99- self .create ()
100- self ._index = pinecone .Index (index_name = self .index_name )
101- return self ._index
109+ def init_index (self ):
110+ """Verify that an index named self.index_name exists in Pinecone. If not, create it."""
111+ indexes = pinecone .manage .list_indexes ()
112+ if self .index_name not in indexes :
113+ logging .info ("Index does not exist." )
114+ self .create ()
102115
103116 def init (self ):
104117 """Initialize Pinecone."""
105118 pinecone .init (api_key = Credentials .PINECONE_API_KEY , environment = Config .PINECONE_ENVIRONMENT )
119+ self ._index = None
120+ self ._index_name = None
121+ self ._text_splitter = None
122+ self ._openai_embeddings = None
123+ self ._vector_store = None
106124
107125 def delete (self ):
108126 """Delete index."""
109- try :
110- logging .info ("Deleting index.. ." )
111- pinecone . delete_index ( self . index_name )
112- except pinecone . exceptions . PineconeException :
113- logging . info ( "Index does not exist. Continuing..." )
127+ if not self . initialized :
128+ logging .info ("Index does not exist. Nothing to delete ." )
129+ return
130+ logging . info ( "Deleting index..." )
131+ pinecone . delete_index ( self . index_name )
114132
115133 def create (self ):
116134 """Create index."""
@@ -121,11 +139,12 @@ def create(self):
121139 logging .info ("Creating index. This may take a few minutes..." )
122140
123141 pinecone .create_index (
124- self .index_name ,
142+ name = self .index_name ,
125143 dimension = Config .PINECONE_DIMENSIONS ,
126144 metric = Config .PINECONE_METRIC ,
127145 metadata_config = metadata_config ,
128146 )
147+ logging .info ("Index created." )
129148
130149 def initialize (self ):
131150 """Initialize index."""
@@ -152,16 +171,17 @@ def pdf_loader(self, filepath: str):
152171 for pdf_file in pdf_files :
153172 i += 1
154173 j = len (pdf_files )
155- logging . info ("Loading PDF %s of %s: %s" , i , j , pdf_file )
174+ print ("Loading PDF %s of %s: %s" , i , j , pdf_file )
156175 loader = PyPDFLoader (file_path = pdf_file )
157176 docs = loader .load ()
158177 k = 0
159178 for doc in docs :
160179 k += 1
161- logging . info (k * "-" , end = "\r " )
180+ print (k * "-" , end = "\r " )
162181 documents = self .text_splitter .create_documents ([doc .page_content ])
163182 document_texts = [doc .page_content for doc in documents ]
164183 embeddings = self .openai_embeddings .embed_documents (document_texts )
165184 self .vector_store .add_documents (documents = documents , embeddings = embeddings )
166185
167- logging .info ("Finished loading PDFs" )
186+ index_stats_string = json .dumps (self .index .describe_index_stats (), indent = 4 )
187+ print ("Finished loading PDFs. \n " + index_stats_string )
0 commit comments