33from urllib .parse import urlparse
44from uuid import uuid4
55import datetime
6+ import json
67import pandas as pd
78
89from mindsdb_sdk .databases import Databases
9- from mindsdb_sdk .knowledge_bases import KnowledgeBases
10- from mindsdb_sdk .models import Model
10+ from mindsdb_sdk .knowledge_bases import KnowledgeBase , KnowledgeBases
11+ from mindsdb_sdk .ml_engines import MLEngines
12+ from mindsdb_sdk .models import Model , Models
1113from mindsdb_sdk .skills import Skill , Skills
1214from mindsdb_sdk .utils .objects_collection import CollectionBase
1315
@@ -79,6 +81,14 @@ def __init__(
7981 def completion (self , messages : List [dict ]) -> AgentCompletion :
8082 return self .collection .completion (self .name , messages )
8183
84+ def add_files (self , file_paths : List [str ], description : str , knowledge_base : str = None ):
85+ """
86+ Add a list of files to the agent for retrieval.
87+
88+ :param file_paths: List of paths to the files to be added.
89+ """
90+ self .collection .add_files (self .name , file_paths , description , knowledge_base )
91+
8292 def add_file (self , file_path : str , description : str , knowledge_base : str = None ):
8393 """
8494 Add a file to the agent for retrieval.
@@ -87,6 +97,14 @@ def add_file(self, file_path: str, description: str, knowledge_base: str = None)
8797 """
8898 self .collection .add_file (self .name , file_path , description , knowledge_base )
8999
100+ def add_webpages (self , urls : List [str ], description : str , knowledge_base : str = None ):
101+ """
102+ Add a list of crawled URLs to the agent for retrieval.
103+
104+ :param urls: List of URLs to be crawled and added.
105+ """
106+ self .collection .add_webpages (self .name , urls , description , knowledge_base )
107+
90108 def add_webpage (self , url : str , description : str , knowledge_base : str = None ):
91109 """
92110 Add a crawled URL to the agent for retrieval.
@@ -95,6 +113,16 @@ def add_webpage(self, url: str, description: str, knowledge_base: str = None):
95113 """
96114 self .collection .add_webpage (self .name , url , description , knowledge_base )
97115
116+ def add_database (self , database : str , tables : List [str ], description : str ):
117+ """
118+ Add a database to the agent for retrieval.
119+
120+ :param database: Name of the database to be added.
121+ :param tables: List of tables to be added.
122+ :param description: Description of the database tables. Used by the agent to know when to use SQL skill.
123+ """
124+ self .collection .add_database (self .name , database , tables , description )
125+
98126 def __repr__ (self ):
99127 return f'{ self .__class__ .__name__ } (name: { self .name } )'
100128
@@ -126,12 +154,14 @@ def from_json(cls, json: dict, collection: CollectionBase):
126154
127155class Agents (CollectionBase ):
128156 """Collection for agents"""
129- def __init__ (self , api , project : str , knowledge_bases : KnowledgeBases , databases : Databases , skills : Skills = None ):
157+ def __init__ (self , api , project : str , knowledge_bases : KnowledgeBases , databases : Databases , models : Models , ml_engines : MLEngines , skills : Skills = None ):
130158 self .api = api
131159 self .project = project
132160 self .skills = skills or Skills (self .api , project )
133161 self .databases = databases
134162 self .knowledge_bases = knowledge_bases
163+ self .ml_engines = ml_engines
164+ self .models = models
135165
136166 def list (self ) -> List [Agent ]:
137167 """
@@ -165,43 +195,57 @@ def completion(self, name: str, messages: List[dict]) -> AgentCompletion:
165195 data = self .api .agent_completion (self .project , name , messages )
166196 return AgentCompletion (data ['message' ]['content' ])
167197
168- def add_file (self , name : str , file_path : str , description : str , knowledge_base : str = None ):
198+ def _create_default_knowledge_base (self , agent : Agent , name : str ) -> KnowledgeBase :
199+ # Make sure default ML engine for embeddings exists.
200+ try :
201+ _ = self .ml_engines .get ('langchain_embedding' )
202+ except AttributeError :
203+ _ = self .ml_engines .create ('langchain_embedding' , 'langchain_embedding' )
204+ # Include API keys in embeddings.
205+ agent_model = self .models .get (agent .model_name )
206+ training_options = json .loads (agent_model .data .get ('training_options' , '{}' ))
207+ training_options_using = training_options .get ('using' , {})
208+ api_key_params = {k :v for k , v in training_options_using .items () if 'api_key' in k }
209+ kb = self .knowledge_bases .create (name , params = api_key_params )
210+ # Wait for underlying embedding model to finish training.
211+ kb .model .wait_complete ()
212+ return kb
213+
214+ def add_files (self , name : str , file_paths : List [str ], description : str , knowledge_base : str = None ):
169215 """
170- Add a file to the agent for retrieval.
216+ Add a list of files to the agent for retrieval.
171217
172218 :param name: Name of the agent
173- :param file_path: Path to the file to be added, or name of existing file .
219+ :param file_paths: List of paths to the files to be added.
174220 :param description: Description of the file. Used by agent to know when to do retrieval
175221 :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
176222 """
177- filename = file_path .split ('/' )[- 1 ]
178- filename_no_extension = filename .split ('.' )[0 ]
179- try :
180- _ = self .api .get_file_metadata (filename_no_extension )
181- except HTTPError as e :
182- if e .response .status_code >= 400 and e .response .status_code != 404 :
183- raise e
184- # Upload file if it doesn't exist.
185- with open (file_path , 'rb' ) as file :
186- content = file .read ()
187- df = pd .DataFrame .from_records ([{'content' : content }])
188- self .api .upload_file (filename_no_extension , df )
189-
190- # Insert uploaded file into new knowledge base.
223+ if not file_paths :
224+ return
225+ filename_no_extension = ''
226+ all_filenames = []
227+ for file_path in file_paths :
228+ filename = file_path .split ('/' )[- 1 ]
229+ filename_no_extension = filename .split ('.' )[0 ]
230+ all_filenames .append (filename_no_extension )
231+ try :
232+ _ = self .api .get_file_metadata (filename_no_extension )
233+ except HTTPError as e :
234+ if e .response .status_code >= 400 and e .response .status_code != 404 :
235+ raise e
236+ # upload file to mindsdb
237+ self .api .upload_file (filename , file_path )
238+
239+ # Insert uploaded files into new knowledge base.
240+ agent = self .get (name )
191241 if knowledge_base is not None :
192242 kb = self .knowledge_bases .get (knowledge_base )
193243 else :
194- kb_name = f'{ name } _{ filename_no_extension } _kb'
195- try :
196- kb = self .knowledge_bases .get (kb_name )
197- except AttributeError as e :
198- # Create KB if it doesn't exist.
199- kb = self .knowledge_bases .create (kb_name )
200- # Wait for underlying embedding model to finish training.
201- kb .model .wait_complete ()
244+ kb_name = f'{ name } _{ filename_no_extension } _{ uuid4 ()} _kb'
245+ kb = self ._create_default_knowledge_base (agent , kb_name )
202246
203247 # Insert the entire file.
204- kb .insert_files ([ filename_no_extension ] )
248+ kb .insert_files (all_filenames )
205249
206250 # Make sure skill name is unique.
207251 skill_name = f'{ filename_no_extension } _retrieval_skill_{ uuid4 ()} '
@@ -210,36 +254,48 @@ def add_file(self, name: str, file_path: str, description: str, knowledge_base:
210254 'description' : description ,
211255 }
212256 file_retrieval_skill = self .skills .create (skill_name , 'retrieval' , retrieval_params )
213- agent = self .get (name )
214257 agent .skills .append (file_retrieval_skill )
215258 self .update (agent .name , agent )
216259
217- def add_webpage (self , name : str , url : str , description : str , knowledge_base : str = None ):
260+
261+ def add_file (self , name : str , file_path : str , description : str , knowledge_base : str = None ):
218262 """
219- Add a webpage to the agent for retrieval.
263+ Add a file to the agent for retrieval.
220264
221265 :param name: Name of the agent
222- :param file_path: URL of the webpage to be added, or name of existing webpage .
223- :param description: Description of the webpage . Used by agent to know when to do retrieval.
266+ :param file_path: Path to the file to be added, or name of existing file .
267+ :param description: Description of the file . Used by agent to know when to do retrieval
224268 :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
225269 """
226- parsed_url = urlparse (url )
227- domain = parsed_url .netloc .replace ('.' , '_' )
228- path = parsed_url .path .replace ('/' , '_' )
270+ self .add_files (name , [file_path ], description , knowledge_base )
271+
272+ def add_webpages (self , name : str , urls : List [str ], description : str , knowledge_base : str = None ):
273+ """
274+ Add a list of webpages to the agent for retrieval.
275+
276+ :param name: Name of the agent
277+ :param urls: List of URLs of the webpages to be added.
278+ :param description: Description of the webpages. Used by agent to know when to do retrieval.
279+ :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
280+ """
281+ if not urls :
282+ return
283+ domain = ''
284+ path = ''
285+ agent = self .get (name )
286+ for url in urls :
287+ # Validate URLs.
288+ parsed_url = urlparse (url )
289+ domain = parsed_url .netloc .replace ('.' , '_' )
290+ path = parsed_url .path .replace ('/' , '_' )
229291 if knowledge_base is not None :
230292 kb = self .knowledge_bases .get (knowledge_base )
231293 else :
232- kb_name = f'{ name } _{ domain } { path } _kb'
233- try :
234- kb = self .knowledge_bases .get (kb_name )
235- except AttributeError :
236- # Create KB if it doesn't exist.
237- kb = self .knowledge_bases .create (kb_name )
238- # Wait for underlying embedding model to finish training.
239- kb .model .wait_complete ()
294+ kb_name = f'{ name } _{ domain } { path } _{ uuid4 ()} _kb'
295+ kb = self ._create_default_knowledge_base (agent , kb_name )
240296
241297 # Insert crawled webpage.
242- kb .insert_webpages ([ url ] )
298+ kb .insert_webpages (urls )
243299
244300 # Make sure skill name is unique.
245301 skill_name = f'{ domain } { path } _retrieval_skill_{ uuid4 ()} '
@@ -248,14 +304,87 @@ def add_webpage(self, name: str, url: str, description: str, knowledge_base: str
248304 'description' : description ,
249305 }
250306 webpage_retrieval_skill = self .skills .create (skill_name , 'retrieval' , retrieval_params )
251- agent = self .get (name )
252307 agent .skills .append (webpage_retrieval_skill )
253308 self .update (agent .name , agent )
254309
310+ def add_webpage (self , name : str , url : str , description : str , knowledge_base : str = None ):
311+ """
312+ Add a webpage to the agent for retrieval.
313+
314+ :param name: Name of the agent
315+ :param file_path: URL of the webpage to be added, or name of existing webpage.
316+ :param description: Description of the webpage. Used by agent to know when to do retrieval.
317+ :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
318+ """
319+ self .add_webpages (name , [url ], description , knowledge_base )
320+
321+ def add_database (self , name : str , database : str , tables : List [str ], description : str ):
322+ """
323+ Add a database to the agent for retrieval.
324+
325+ :param name: Name of the agent
326+ :param database: Name of the database to be added.
327+ :param tables: List of tables to be added.
328+ :param description: Description of the database. Used by agent to know when to do retrieval.
329+ """
330+ # Make sure database exists.
331+ db = self .databases .get (database )
332+ # Make sure tables exist.
333+ all_table_names = set ([t .name for t in db .tables .list ()])
334+ for t in tables :
335+ if t not in all_table_names :
336+ raise ValueError (f'Table { t } does not exist in database { database } .' )
337+
338+ # Make sure skill name is unique.
339+ skill_name = f'{ database } _sql_skill_{ uuid4 ()} '
340+ sql_params = {
341+ 'database' : database ,
342+ 'tables' : tables ,
343+ 'description' : description ,
344+ }
345+ database_sql_skill = self .skills .create (skill_name , 'sql' , sql_params )
346+ agent = self .get (name )
347+ agent .skills .append (database_sql_skill )
348+ self .update (agent .name , agent )
349+
350+ def _create_ml_engine_if_not_exists (self , name : str = 'langchain' ):
351+ try :
352+ _ = self .ml_engines .get ('langchain' )
353+ except Exception :
354+ # Create the engine if it doesn't exist.
355+ _ = self .ml_engines .create ('langchain' , handler = 'langchain' )
356+
357+ def _create_model_if_not_exists (self , name : str , model : Union [Model , dict ]) -> Model :
358+ # Create langchain engine if it doesn't exist.
359+ self ._create_ml_engine_if_not_exists ()
360+ # Create a default model if it doesn't exist.
361+ default_model_params = {
362+ 'predict' : 'answer' ,
363+ 'mode' : 'retrieval' ,
364+ 'engine' : 'langchain' ,
365+ 'prompt_template' : 'Answer the user"s question in a helpful way: {{question}}' ,
366+ # Use GPT-4 by default.
367+ 'provider' : 'openai' ,
368+ 'model_name' : 'gpt-4'
369+ }
370+ if model is None :
371+ return self .models .create (
372+ f'{ name } _default_model' ,
373+ ** default_model_params
374+ )
375+ if isinstance (model , dict ):
376+ default_model_params .update (model )
377+ # Create model with passed in params.
378+ return self .models .create (
379+ f'{ name } _default_model' ,
380+ ** default_model_params
381+ )
382+ return model
383+
255384 def create (
256385 self ,
257386 name : str ,
258- model : Model ,
387+ model : Union [ Model , dict ] = None ,
259388 skills : List [Union [Skill , str ]] = None ,
260389 params : dict = None ) -> Agent :
261390 """
@@ -280,6 +409,8 @@ def create(
280409 _ = self .skills .create (skill .name , skill .type , skill .params )
281410 skill_names .append (skill .name )
282411
412+ # Create a default model if it doesn't exist.
413+ model = self ._create_model_if_not_exists (name , model )
283414 data = self .api .create_agent (self .project , name , model .name , skill_names , params )
284415 return Agent .from_json (data , self )
285416
0 commit comments