Skip to content

Commit 8dc61e4

Browse files
authored
Merge pull request #113 from mindsdb/fix-file-upload
Add functionality to upload files to MindsDB
2 parents ba2eefe + 36ec784 commit 8dc61e4

File tree

4 files changed

+68
-15
lines changed

4 files changed

+68
-15
lines changed

examples/data/tokaido-rulebook.pdf

3.01 MB
Binary file not shown.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import mindsdb_sdk
2+
3+
con = mindsdb_sdk.connect()
4+
5+
# Now create an agent that will use the model we just created.
6+
agent = con.agents.get('agent_with_retrieval')
7+
agent.add_file('./data/tokaido-rulebook.pdf', 'rule book for the board game takaido')
8+
9+
print('Ask a question: ')
10+
question = input()
11+
answer = agent.completion([{'question': question, 'answer': None}])
12+
print(answer.content)

mindsdb_sdk/agents.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -233,11 +233,8 @@ def add_files(self, name: str, file_paths: List[str], description: str, knowledg
233233
except HTTPError as e:
234234
if e.response.status_code >= 400 and e.response.status_code != 404:
235235
raise e
236-
# Upload file if it doesn't exist.
237-
with open(file_path, 'rb') as file:
238-
content = file.read()
239-
df = pd.DataFrame.from_records([{'content': content}])
240-
self.api.upload_file(filename_no_extension, df)
236+
# upload file to mindsdb
237+
self.api.upload_file(filename, file_path)
241238

242239
# Insert uploaded files into new knowledge base.
243240
agent = self.get(name)

mindsdb_sdk/connectors/rest_api.py

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from functools import wraps
2-
from typing import List
2+
from typing import List, Union
33
import io
44

55
import requests
@@ -128,28 +128,72 @@ def objects_tree(self, item=''):
128128

129129
return pd.DataFrame(r.json())
130130

131-
@_try_relogin
132-
def upload_file(self, name: str, df: pd.DataFrame):
133-
134-
# convert to file
131+
@staticmethod
132+
def read_file_as_bytes(file_path: str):
133+
"""
134+
Read and return content of a file in bytes, given its path.
135+
:param file_path: Path of the file to read.
136+
:return: File content in bytes.
137+
"""
138+
try:
139+
with open(file_path, 'rb+') as file:
140+
return file.read()
141+
except FileNotFoundError:
142+
raise Exception(f'File {file_path} does not exist.')
143+
except PermissionError:
144+
raise Exception(f'Permission denied when reading file {file_path}.')
145+
except Exception as e:
146+
raise Exception(f'Unknown error occurred when reading file {file_path} - {str(e)}')
147+
@staticmethod
148+
def read_dataframe_as_csv(data: pd.DataFrame):
149+
"""
150+
Read and return content of a DataFrame as CSV in bytes.
151+
:param data: DataFrame to read.
152+
:return: DataFrame content as CSV in bytes.
153+
"""
135154
fd = io.BytesIO()
136-
df.to_csv(fd, index=False)
155+
data.to_csv(fd, index=False)
137156
fd.seek(0)
157+
return fd.read()
158+
159+
def upload_data(self, file_name: str, data: bytes):
160+
"""
161+
Upload binary data to MindsDB.
162+
:param file_name: Name of the file.
163+
:param data: Binary data to upload.
164+
"""
165+
# remove suffix from file if present
166+
name = file_name.split('.')[0]
138167

139168
url = self.url + f'/api/files/{name}'
140169
r = self.session.put(
141170
url,
142171
data={
143-
'source': name,
144-
'name': name,
145-
'source_type': 'file',
172+
'original_file_name':file_name,
173+
'name':name,
174+
'source_type':'file',
146175
},
147176
files={
148-
'file': fd,
177+
'file': (file_name, data)
178+
149179
}
150180
)
151181
_raise_for_status(r)
152182

183+
@_try_relogin
184+
def upload_file(self, name: str, data: Union[pd.DataFrame, str]):
185+
"""
186+
Upload a file or a DataFrame to MindsDB.
187+
:param name: Name of the file or DataFrame.
188+
:param data: DataFrame data or file path.
189+
"""
190+
if isinstance(data, pd.DataFrame):
191+
data_in_bytes = self.read_dataframe_as_csv(data)
192+
else:
193+
data_in_bytes = self.read_file_as_bytes(data)
194+
195+
self.upload_data(name, data_in_bytes)
196+
153197
@_try_relogin
154198
def get_file_metadata(self, name: str) -> dict:
155199
# No endpoint currently to get single file.

0 commit comments

Comments
 (0)