Skip to content

Commit 36ec784

Browse files
committed
Add functionality to upload files to MindsDB
This commit extends the upload functionality of MindsDB by allowing users to upload files using its SDK. The changes include updating the rest_api connector to handle file reading and uploading, modifying the agents to use this new functionality, and providing a new example script to illustrate this use case.
1 parent ba2eefe commit 36ec784

File tree

4 files changed

+68
-15
lines changed

4 files changed

+68
-15
lines changed

examples/data/tokaido-rulebook.pdf

3.01 MB
Binary file not shown.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import mindsdb_sdk
2+
3+
con = mindsdb_sdk.connect()
4+
5+
# Now create an agent that will use the model we just created.
6+
agent = con.agents.get('agent_with_retrieval')
7+
agent.add_file('./data/tokaido-rulebook.pdf', 'rule book for the board game takaido')
8+
9+
print('Ask a question: ')
10+
question = input()
11+
answer = agent.completion([{'question': question, 'answer': None}])
12+
print(answer.content)

mindsdb_sdk/agents.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -233,11 +233,8 @@ def add_files(self, name: str, file_paths: List[str], description: str, knowledg
233233
except HTTPError as e:
234234
if e.response.status_code >= 400 and e.response.status_code != 404:
235235
raise e
236-
# Upload file if it doesn't exist.
237-
with open(file_path, 'rb') as file:
238-
content = file.read()
239-
df = pd.DataFrame.from_records([{'content': content}])
240-
self.api.upload_file(filename_no_extension, df)
236+
# upload file to mindsdb
237+
self.api.upload_file(filename, file_path)
241238

242239
# Insert uploaded files into new knowledge base.
243240
agent = self.get(name)

mindsdb_sdk/connectors/rest_api.py

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from functools import wraps
2-
from typing import List
2+
from typing import List, Union
33
import io
44

55
import requests
@@ -128,28 +128,72 @@ def objects_tree(self, item=''):
128128

129129
return pd.DataFrame(r.json())
130130

131-
@_try_relogin
132-
def upload_file(self, name: str, df: pd.DataFrame):
133-
134-
# convert to file
131+
@staticmethod
132+
def read_file_as_bytes(file_path: str):
133+
"""
134+
Read and return content of a file in bytes, given its path.
135+
:param file_path: Path of the file to read.
136+
:return: File content in bytes.
137+
"""
138+
try:
139+
with open(file_path, 'rb+') as file:
140+
return file.read()
141+
except FileNotFoundError:
142+
raise Exception(f'File {file_path} does not exist.')
143+
except PermissionError:
144+
raise Exception(f'Permission denied when reading file {file_path}.')
145+
except Exception as e:
146+
raise Exception(f'Unknown error occurred when reading file {file_path} - {str(e)}')
147+
@staticmethod
148+
def read_dataframe_as_csv(data: pd.DataFrame):
149+
"""
150+
Read and return content of a DataFrame as CSV in bytes.
151+
:param data: DataFrame to read.
152+
:return: DataFrame content as CSV in bytes.
153+
"""
135154
fd = io.BytesIO()
136-
df.to_csv(fd, index=False)
155+
data.to_csv(fd, index=False)
137156
fd.seek(0)
157+
return fd.read()
158+
159+
def upload_data(self, file_name: str, data: bytes):
160+
"""
161+
Upload binary data to MindsDB.
162+
:param file_name: Name of the file.
163+
:param data: Binary data to upload.
164+
"""
165+
# remove suffix from file if present
166+
name = file_name.split('.')[0]
138167

139168
url = self.url + f'/api/files/{name}'
140169
r = self.session.put(
141170
url,
142171
data={
143-
'source': name,
144-
'name': name,
145-
'source_type': 'file',
172+
'original_file_name':file_name,
173+
'name':name,
174+
'source_type':'file',
146175
},
147176
files={
148-
'file': fd,
177+
'file': (file_name, data)
178+
149179
}
150180
)
151181
_raise_for_status(r)
152182

183+
@_try_relogin
184+
def upload_file(self, name: str, data: Union[pd.DataFrame, str]):
185+
"""
186+
Upload a file or a DataFrame to MindsDB.
187+
:param name: Name of the file or DataFrame.
188+
:param data: DataFrame data or file path.
189+
"""
190+
if isinstance(data, pd.DataFrame):
191+
data_in_bytes = self.read_dataframe_as_csv(data)
192+
else:
193+
data_in_bytes = self.read_file_as_bytes(data)
194+
195+
self.upload_data(name, data_in_bytes)
196+
153197
@_try_relogin
154198
def get_file_metadata(self, name: str) -> dict:
155199
# No endpoint currently to get single file.

0 commit comments

Comments
 (0)