Skip to content

Commit d929d90

Browse files
authored
Merge pull request #112 from mindsdb/agents-web
Agents UX Improvements
2 parents dd2684a + 8dc61e4 commit d929d90

File tree

7 files changed

+331
-80
lines changed

7 files changed

+331
-80
lines changed

examples/data/tokaido-rulebook.pdf

3.01 MB
Binary file not shown.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import mindsdb_sdk
2+
3+
con = mindsdb_sdk.connect()
4+
5+
# Now create an agent that will use the model we just created.
6+
agent = con.agents.get('agent_with_retrieval')
7+
agent.add_file('./data/tokaido-rulebook.pdf', 'rule book for the board game takaido')
8+
9+
print('Ask a question: ')
10+
question = input()
11+
answer = agent.completion([{'question': question, 'answer': None}])
12+
print(answer.content)

mindsdb_sdk/agents.py

Lines changed: 179 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
from urllib.parse import urlparse
44
from uuid import uuid4
55
import datetime
6+
import json
67
import pandas as pd
78

89
from mindsdb_sdk.databases import Databases
9-
from mindsdb_sdk.knowledge_bases import KnowledgeBases
10-
from mindsdb_sdk.models import Model
10+
from mindsdb_sdk.knowledge_bases import KnowledgeBase, KnowledgeBases
11+
from mindsdb_sdk.ml_engines import MLEngines
12+
from mindsdb_sdk.models import Model, Models
1113
from mindsdb_sdk.skills import Skill, Skills
1214
from mindsdb_sdk.utils.objects_collection import CollectionBase
1315

@@ -79,6 +81,14 @@ def __init__(
7981
def completion(self, messages: List[dict]) -> AgentCompletion:
8082
return self.collection.completion(self.name, messages)
8183

84+
def add_files(self, file_paths: List[str], description: str, knowledge_base: str = None):
85+
"""
86+
Add a list of files to the agent for retrieval.
87+
88+
:param file_paths: List of paths to the files to be added.
89+
"""
90+
self.collection.add_files(self.name, file_paths, description, knowledge_base)
91+
8292
def add_file(self, file_path: str, description: str, knowledge_base: str = None):
8393
"""
8494
Add a file to the agent for retrieval.
@@ -87,6 +97,14 @@ def add_file(self, file_path: str, description: str, knowledge_base: str = None)
8797
"""
8898
self.collection.add_file(self.name, file_path, description, knowledge_base)
8999

100+
def add_webpages(self, urls: List[str], description: str, knowledge_base: str = None):
101+
"""
102+
Add a list of crawled URLs to the agent for retrieval.
103+
104+
:param urls: List of URLs to be crawled and added.
105+
"""
106+
self.collection.add_webpages(self.name, urls, description, knowledge_base)
107+
90108
def add_webpage(self, url: str, description: str, knowledge_base: str = None):
91109
"""
92110
Add a crawled URL to the agent for retrieval.
@@ -95,6 +113,16 @@ def add_webpage(self, url: str, description: str, knowledge_base: str = None):
95113
"""
96114
self.collection.add_webpage(self.name, url, description, knowledge_base)
97115

116+
def add_database(self, database: str, tables: List[str], description: str):
117+
"""
118+
Add a database to the agent for retrieval.
119+
120+
:param database: Name of the database to be added.
121+
:param tables: List of tables to be added.
122+
:param description: Description of the database tables. Used by the agent to know when to use SQL skill.
123+
"""
124+
self.collection.add_database(self.name, database, tables, description)
125+
98126
def __repr__(self):
99127
return f'{self.__class__.__name__}(name: {self.name})'
100128

@@ -126,12 +154,14 @@ def from_json(cls, json: dict, collection: CollectionBase):
126154

127155
class Agents(CollectionBase):
128156
"""Collection for agents"""
129-
def __init__(self, api, project: str, knowledge_bases: KnowledgeBases, databases: Databases, skills: Skills = None):
157+
def __init__(self, api, project: str, knowledge_bases: KnowledgeBases, databases: Databases, models: Models, ml_engines: MLEngines, skills: Skills = None):
130158
self.api = api
131159
self.project = project
132160
self.skills = skills or Skills(self.api, project)
133161
self.databases = databases
134162
self.knowledge_bases = knowledge_bases
163+
self.ml_engines = ml_engines
164+
self.models = models
135165

136166
def list(self) -> List[Agent]:
137167
"""
@@ -165,43 +195,57 @@ def completion(self, name: str, messages: List[dict]) -> AgentCompletion:
165195
data = self.api.agent_completion(self.project, name, messages)
166196
return AgentCompletion(data['message']['content'])
167197

168-
def add_file(self, name: str, file_path: str, description: str, knowledge_base: str = None):
198+
def _create_default_knowledge_base(self, agent: Agent, name: str) -> KnowledgeBase:
199+
# Make sure default ML engine for embeddings exists.
200+
try:
201+
_ = self.ml_engines.get('langchain_embedding')
202+
except AttributeError:
203+
_ = self.ml_engines.create('langchain_embedding', 'langchain_embedding')
204+
# Include API keys in embeddings.
205+
agent_model = self.models.get(agent.model_name)
206+
training_options = json.loads(agent_model.data.get('training_options', '{}'))
207+
training_options_using = training_options.get('using', {})
208+
api_key_params = {k:v for k, v in training_options_using.items() if 'api_key' in k}
209+
kb = self.knowledge_bases.create(name, params=api_key_params)
210+
# Wait for underlying embedding model to finish training.
211+
kb.model.wait_complete()
212+
return kb
213+
214+
def add_files(self, name: str, file_paths: List[str], description: str, knowledge_base: str = None):
169215
"""
170-
Add a file to the agent for retrieval.
216+
Add a list of files to the agent for retrieval.
171217
172218
:param name: Name of the agent
173-
:param file_path: Path to the file to be added, or name of existing file.
219+
:param file_paths: List of paths to the files to be added.
174220
:param description: Description of the file. Used by agent to know when to do retrieval
175221
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
176222
"""
177-
filename = file_path.split('/')[-1]
178-
filename_no_extension = filename.split('.')[0]
179-
try:
180-
_ = self.api.get_file_metadata(filename_no_extension)
181-
except HTTPError as e:
182-
if e.response.status_code >= 400 and e.response.status_code != 404:
183-
raise e
184-
# Upload file if it doesn't exist.
185-
with open(file_path, 'rb') as file:
186-
content = file.read()
187-
df = pd.DataFrame.from_records([{'content': content}])
188-
self.api.upload_file(filename_no_extension, df)
189-
190-
# Insert uploaded file into new knowledge base.
223+
if not file_paths:
224+
return
225+
filename_no_extension = ''
226+
all_filenames = []
227+
for file_path in file_paths:
228+
filename = file_path.split('/')[-1]
229+
filename_no_extension = filename.split('.')[0]
230+
all_filenames.append(filename_no_extension)
231+
try:
232+
_ = self.api.get_file_metadata(filename_no_extension)
233+
except HTTPError as e:
234+
if e.response.status_code >= 400 and e.response.status_code != 404:
235+
raise e
236+
# upload file to mindsdb
237+
self.api.upload_file(filename, file_path)
238+
239+
# Insert uploaded files into new knowledge base.
240+
agent = self.get(name)
191241
if knowledge_base is not None:
192242
kb = self.knowledge_bases.get(knowledge_base)
193243
else:
194-
kb_name = f'{name}_{filename_no_extension}_kb'
195-
try:
196-
kb = self.knowledge_bases.get(kb_name)
197-
except AttributeError as e:
198-
# Create KB if it doesn't exist.
199-
kb = self.knowledge_bases.create(kb_name)
200-
# Wait for underlying embedding model to finish training.
201-
kb.model.wait_complete()
244+
kb_name = f'{name}_{filename_no_extension}_{uuid4()}_kb'
245+
kb = self._create_default_knowledge_base(agent, kb_name)
202246

203247
# Insert the entire file.
204-
kb.insert_files([filename_no_extension])
248+
kb.insert_files(all_filenames)
205249

206250
# Make sure skill name is unique.
207251
skill_name = f'{filename_no_extension}_retrieval_skill_{uuid4()}'
@@ -210,36 +254,48 @@ def add_file(self, name: str, file_path: str, description: str, knowledge_base:
210254
'description': description,
211255
}
212256
file_retrieval_skill = self.skills.create(skill_name, 'retrieval', retrieval_params)
213-
agent = self.get(name)
214257
agent.skills.append(file_retrieval_skill)
215258
self.update(agent.name, agent)
216259

217-
def add_webpage(self, name: str, url: str, description: str, knowledge_base: str = None):
260+
261+
def add_file(self, name: str, file_path: str, description: str, knowledge_base: str = None):
218262
"""
219-
Add a webpage to the agent for retrieval.
263+
Add a file to the agent for retrieval.
220264
221265
:param name: Name of the agent
222-
:param file_path: URL of the webpage to be added, or name of existing webpage.
223-
:param description: Description of the webpage. Used by agent to know when to do retrieval.
266+
:param file_path: Path to the file to be added, or name of existing file.
267+
:param description: Description of the file. Used by agent to know when to do retrieval
224268
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
225269
"""
226-
parsed_url = urlparse(url)
227-
domain = parsed_url.netloc.replace('.', '_')
228-
path = parsed_url.path.replace('/', '_')
270+
self.add_files(name, [file_path], description, knowledge_base)
271+
272+
def add_webpages(self, name: str, urls: List[str], description: str, knowledge_base: str = None):
273+
"""
274+
Add a list of webpages to the agent for retrieval.
275+
276+
:param name: Name of the agent
277+
:param urls: List of URLs of the webpages to be added.
278+
:param description: Description of the webpages. Used by agent to know when to do retrieval.
279+
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
280+
"""
281+
if not urls:
282+
return
283+
domain = ''
284+
path = ''
285+
agent = self.get(name)
286+
for url in urls:
287+
# Validate URLs.
288+
parsed_url = urlparse(url)
289+
domain = parsed_url.netloc.replace('.', '_')
290+
path = parsed_url.path.replace('/', '_')
229291
if knowledge_base is not None:
230292
kb = self.knowledge_bases.get(knowledge_base)
231293
else:
232-
kb_name = f'{name}_{domain}{path}_kb'
233-
try:
234-
kb = self.knowledge_bases.get(kb_name)
235-
except AttributeError:
236-
# Create KB if it doesn't exist.
237-
kb = self.knowledge_bases.create(kb_name)
238-
# Wait for underlying embedding model to finish training.
239-
kb.model.wait_complete()
294+
kb_name = f'{name}_{domain}{path}_{uuid4()}_kb'
295+
kb = self._create_default_knowledge_base(agent, kb_name)
240296

241297
# Insert crawled webpage.
242-
kb.insert_webpages([url])
298+
kb.insert_webpages(urls)
243299

244300
# Make sure skill name is unique.
245301
skill_name = f'{domain}{path}_retrieval_skill_{uuid4()}'
@@ -248,14 +304,87 @@ def add_webpage(self, name: str, url: str, description: str, knowledge_base: str
248304
'description': description,
249305
}
250306
webpage_retrieval_skill = self.skills.create(skill_name, 'retrieval', retrieval_params)
251-
agent = self.get(name)
252307
agent.skills.append(webpage_retrieval_skill)
253308
self.update(agent.name, agent)
254309

310+
def add_webpage(self, name: str, url: str, description: str, knowledge_base: str = None):
311+
"""
312+
Add a webpage to the agent for retrieval.
313+
314+
:param name: Name of the agent
315+
:param file_path: URL of the webpage to be added, or name of existing webpage.
316+
:param description: Description of the webpage. Used by agent to know when to do retrieval.
317+
:param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
318+
"""
319+
self.add_webpages(name, [url], description, knowledge_base)
320+
321+
def add_database(self, name: str, database: str, tables: List[str], description: str):
322+
"""
323+
Add a database to the agent for retrieval.
324+
325+
:param name: Name of the agent
326+
:param database: Name of the database to be added.
327+
:param tables: List of tables to be added.
328+
:param description: Description of the database. Used by agent to know when to do retrieval.
329+
"""
330+
# Make sure database exists.
331+
db = self.databases.get(database)
332+
# Make sure tables exist.
333+
all_table_names = set([t.name for t in db.tables.list()])
334+
for t in tables:
335+
if t not in all_table_names:
336+
raise ValueError(f'Table {t} does not exist in database {database}.')
337+
338+
# Make sure skill name is unique.
339+
skill_name = f'{database}_sql_skill_{uuid4()}'
340+
sql_params = {
341+
'database': database,
342+
'tables': tables,
343+
'description': description,
344+
}
345+
database_sql_skill = self.skills.create(skill_name, 'sql', sql_params)
346+
agent = self.get(name)
347+
agent.skills.append(database_sql_skill)
348+
self.update(agent.name, agent)
349+
350+
def _create_ml_engine_if_not_exists(self, name: str = 'langchain'):
351+
try:
352+
_ = self.ml_engines.get('langchain')
353+
except Exception:
354+
# Create the engine if it doesn't exist.
355+
_ = self.ml_engines.create('langchain', handler='langchain')
356+
357+
def _create_model_if_not_exists(self, name: str, model: Union[Model, dict]) -> Model:
358+
# Create langchain engine if it doesn't exist.
359+
self._create_ml_engine_if_not_exists()
360+
# Create a default model if it doesn't exist.
361+
default_model_params = {
362+
'predict': 'answer',
363+
'mode': 'retrieval',
364+
'engine': 'langchain',
365+
'prompt_template': 'Answer the user"s question in a helpful way: {{question}}',
366+
# Use GPT-4 by default.
367+
'provider': 'openai',
368+
'model_name': 'gpt-4'
369+
}
370+
if model is None:
371+
return self.models.create(
372+
f'{name}_default_model',
373+
**default_model_params
374+
)
375+
if isinstance(model, dict):
376+
default_model_params.update(model)
377+
# Create model with passed in params.
378+
return self.models.create(
379+
f'{name}_default_model',
380+
**default_model_params
381+
)
382+
return model
383+
255384
def create(
256385
self,
257386
name: str,
258-
model: Model,
387+
model: Union[Model, dict] = None,
259388
skills: List[Union[Skill, str]] = None,
260389
params: dict = None) -> Agent:
261390
"""
@@ -280,6 +409,8 @@ def create(
280409
_ = self.skills.create(skill.name, skill.type, skill.params)
281410
skill_names.append(skill.name)
282411

412+
# Create a default model if it doesn't exist.
413+
model = self._create_model_if_not_exists(name, model)
283414
data = self.api.create_agent(self.project, name, model.name, skill_names, params)
284415
return Agent.from_json(data, self)
285416

0 commit comments

Comments
 (0)