Skip to content

Commit c542bb6

Browse files
authored
feat(File): add File type for handling file data (#9014)
* feat(File): add File type for handling file data and implement related methods Signed-off-by: TomuHirata <tomu.hirata@gmail.com> * feat(File): enhance File class to support MIME type detection and data URI representation - Added MIME type detection in `from_path` and `from_bytes` methods. - Updated `__repr__` to display file data as a data URI. - Modified tests to validate new functionality and ensure correct MIME type handling. Signed-off-by: TomuHirata <tomu.hirata@gmail.com> * comments Signed-off-by: TomuHirata <tomu.hirata@gmail.com> * feat(file): enhance file input handling and add API documentation - Updated the File class to include a reference to the OpenAI API specification for file content. - Enhanced the _convert_chat_request_to_responses_request function to support file inputs, including file_data and file_id. - Added comprehensive tests to validate the conversion of various file input formats in the responses API. Signed-off-by: TomuHirata <tomu.hirata@gmail.com> * feat(inspect_history): add file handling to pretty_print_history function - Enhanced the pretty_print_history function to support displaying file input types, including filename, file_id, and data length. - Improved output formatting for better readability of file-related information. Signed-off-by: TomuHirata <tomu.hirata@gmail.com> --------- Signed-off-by: TomuHirata <tomu.hirata@gmail.com>
1 parent ab9d168 commit c542bb6

File tree

8 files changed

+574
-3
lines changed

8 files changed

+574
-3
lines changed

dspy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from dspy.evaluate import Evaluate # isort: skip
88
from dspy.clients import * # isort: skip
9-
from dspy.adapters import Adapter, ChatAdapter, JSONAdapter, XMLAdapter, TwoStepAdapter, Image, Audio, History, Type, Tool, ToolCalls, Code # isort: skip
9+
from dspy.adapters import Adapter, ChatAdapter, JSONAdapter, XMLAdapter, TwoStepAdapter, Image, Audio, File, History, Type, Tool, ToolCalls, Code # isort: skip
1010
from dspy.utils.logging_utils import configure_dspy_loggers, disable_logging, enable_logging
1111
from dspy.utils.asyncify import asyncify
1212
from dspy.utils.syncify import syncify

dspy/adapters/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from dspy.adapters.chat_adapter import ChatAdapter
33
from dspy.adapters.json_adapter import JSONAdapter
44
from dspy.adapters.two_step_adapter import TwoStepAdapter
5-
from dspy.adapters.types import Audio, Code, History, Image, Tool, ToolCalls, Type
5+
from dspy.adapters.types import Audio, Code, File, History, Image, Tool, ToolCalls, Type
66
from dspy.adapters.xml_adapter import XMLAdapter
77

88
__all__ = [
@@ -12,6 +12,7 @@
1212
"History",
1313
"Image",
1414
"Audio",
15+
"File",
1516
"Code",
1617
"JSONAdapter",
1718
"XMLAdapter",

dspy/adapters/types/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from dspy.adapters.types.audio import Audio
22
from dspy.adapters.types.base_type import Type
33
from dspy.adapters.types.code import Code
4+
from dspy.adapters.types.file import File
45
from dspy.adapters.types.history import History
56
from dspy.adapters.types.image import Image
67
from dspy.adapters.types.tool import Tool, ToolCalls
78

8-
__all__ = ["History", "Image", "Audio", "Type", "Tool", "ToolCalls", "Code"]
9+
__all__ = ["History", "Image", "Audio", "File", "Type", "Tool", "ToolCalls", "Code"]

dspy/adapters/types/file.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
import base64
2+
import mimetypes
3+
import os
4+
from typing import Any
5+
6+
import pydantic
7+
8+
from dspy.adapters.types.base_type import Type
9+
10+
11+
class File(Type):
12+
"""A file input type for DSPy.
13+
See https://platform.openai.com/docs/api-reference/chat/create#chat_create-messages-user_message-content-array_of_content_parts-file_content_part-file for specification.
14+
15+
The file_data field should be a data URI with the format:
16+
data:<mime_type>;base64,<base64_encoded_data>
17+
18+
Example:
19+
```python
20+
import dspy
21+
22+
class QA(dspy.Signature):
23+
file: dspy.File = dspy.InputField()
24+
summary = dspy.OutputField()
25+
program = dspy.Predict(QA)
26+
result = program(file=dspy.File.from_path("./research.pdf"))
27+
print(result.summary)
28+
```
29+
"""
30+
31+
file_data: str | None = None
32+
file_id: str | None = None
33+
filename: str | None = None
34+
35+
model_config = pydantic.ConfigDict(
36+
frozen=True,
37+
str_strip_whitespace=True,
38+
validate_assignment=True,
39+
extra="forbid",
40+
)
41+
42+
@pydantic.model_validator(mode="before")
43+
@classmethod
44+
def validate_input(cls, values: Any) -> Any:
45+
if isinstance(values, cls):
46+
return {
47+
"file_data": values.file_data,
48+
"file_id": values.file_id,
49+
"filename": values.filename,
50+
}
51+
52+
if isinstance(values, dict):
53+
if "file_data" in values or "file_id" in values or "filename" in values:
54+
return values
55+
raise ValueError("Value of `dspy.File` must contain at least one of: file_data, file_id, or filename")
56+
57+
return encode_file_to_dict(values)
58+
59+
def format(self) -> list[dict[str, Any]]:
60+
try:
61+
file_dict = {}
62+
if self.file_data:
63+
file_dict["file_data"] = self.file_data
64+
if self.file_id:
65+
file_dict["file_id"] = self.file_id
66+
if self.filename:
67+
file_dict["filename"] = self.filename
68+
69+
return [{"type": "file", "file": file_dict}]
70+
except Exception as e:
71+
raise ValueError(f"Failed to format file for DSPy: {e}")
72+
73+
def __str__(self):
74+
return self.serialize_model()
75+
76+
def __repr__(self):
77+
parts = []
78+
if self.file_data is not None:
79+
if self.file_data.startswith("data:"):
80+
# file data has "data:text/plain;base64,..." format
81+
mime_type = self.file_data.split(";")[0].split(":")[1]
82+
len_data = len(self.file_data.split("base64,")[1]) if "base64," in self.file_data else len(self.file_data)
83+
parts.append(f"file_data=<DATA_URI({mime_type}, {len_data} chars)>")
84+
else:
85+
len_data = len(self.file_data)
86+
parts.append(f"file_data=<DATA({len_data} chars)>")
87+
if self.file_id is not None:
88+
parts.append(f"file_id='{self.file_id}'")
89+
if self.filename is not None:
90+
parts.append(f"filename='{self.filename}'")
91+
return f"File({', '.join(parts)})"
92+
93+
@classmethod
94+
def from_path(cls, file_path: str, filename: str | None = None, mime_type: str | None = None) -> "File":
95+
"""Create a File from a local file path.
96+
97+
Args:
98+
file_path: Path to the file to read
99+
filename: Optional filename to use (defaults to basename of path)
100+
mime_type: Optional MIME type (defaults to auto-detection from file extension)
101+
"""
102+
if not os.path.isfile(file_path):
103+
raise ValueError(f"File not found: {file_path}")
104+
105+
with open(file_path, "rb") as f:
106+
file_bytes = f.read()
107+
108+
if filename is None:
109+
filename = os.path.basename(file_path)
110+
111+
if mime_type is None:
112+
mime_type, _ = mimetypes.guess_type(file_path)
113+
if mime_type is None:
114+
mime_type = "application/octet-stream"
115+
116+
encoded_data = base64.b64encode(file_bytes).decode("utf-8")
117+
file_data = f"data:{mime_type};base64,{encoded_data}"
118+
119+
return cls(file_data=file_data, filename=filename)
120+
121+
@classmethod
122+
def from_bytes(
123+
cls, file_bytes: bytes, filename: str | None = None, mime_type: str = "application/octet-stream"
124+
) -> "File":
125+
"""Create a File from raw bytes.
126+
127+
Args:
128+
file_bytes: Raw bytes of the file
129+
filename: Optional filename
130+
mime_type: MIME type (defaults to 'application/octet-stream')
131+
"""
132+
encoded_data = base64.b64encode(file_bytes).decode("utf-8")
133+
file_data = f"data:{mime_type};base64,{encoded_data}"
134+
return cls(file_data=file_data, filename=filename)
135+
136+
@classmethod
137+
def from_file_id(cls, file_id: str, filename: str | None = None) -> "File":
138+
"""Create a File from an uploaded file ID."""
139+
return cls(file_id=file_id, filename=filename)
140+
141+
142+
def encode_file_to_dict(file_input: Any) -> dict:
143+
"""
144+
Encode various file inputs to a dict with file_data, file_id, and/or filename.
145+
146+
Args:
147+
file_input: Can be a file path (str), bytes, or File instance.
148+
149+
Returns:
150+
dict: A dictionary with file_data, file_id, and/or filename keys.
151+
"""
152+
if isinstance(file_input, File):
153+
result = {}
154+
if file_input.file_data is not None:
155+
result["file_data"] = file_input.file_data
156+
if file_input.file_id is not None:
157+
result["file_id"] = file_input.file_id
158+
if file_input.filename is not None:
159+
result["filename"] = file_input.filename
160+
return result
161+
162+
elif isinstance(file_input, str):
163+
if os.path.isfile(file_input):
164+
file_obj = File.from_path(file_input)
165+
else:
166+
raise ValueError(f"Unrecognized file string: {file_input}; must be a valid file path")
167+
168+
return {
169+
"file_data": file_obj.file_data,
170+
"filename": file_obj.filename,
171+
}
172+
173+
elif isinstance(file_input, bytes):
174+
file_obj = File.from_bytes(file_input)
175+
return {"file_data": file_obj.file_data}
176+
177+
else:
178+
raise ValueError(f"Unsupported file input type: {type(file_input)}")

dspy/clients/lm.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,11 @@ async def alitellm_responses_completion(request: dict[str, Any], num_retries: in
476476

477477

478478
def _convert_chat_request_to_responses_request(request: dict[str, Any]):
479+
"""
480+
Convert a chat request to a responses request
481+
See https://platform.openai.com/docs/api-reference/responses/create for the responses API specification.
482+
Also see https://platform.openai.com/docs/api-reference/chat/create for the chat API specification.
483+
"""
479484
request = dict(request)
480485
if "messages" in request:
481486
content_blocks = []
@@ -525,6 +530,14 @@ def _convert_content_item_to_responses_format(item: dict[str, Any]) -> dict[str,
525530
"type": "input_text",
526531
"text": item.get("text", ""),
527532
}
533+
elif item.get("type") == "file":
534+
file = item.get("file", {})
535+
return {
536+
"type": "input_file",
537+
"file_data": file.get("file_data"),
538+
"filename": file.get("filename"),
539+
"file_id": file.get("file_id"),
540+
}
528541

529542
# For other items, return as-is
530543
return item

dspy/utils/inspect_history.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ def pretty_print_history(history, n: int = 1):
4646
len_audio = len(c["input_audio"]["data"])
4747
audio_str = f"<audio format='{audio_format}' base64-encoded, length={len_audio}>"
4848
print(_blue(audio_str.strip()))
49+
elif c["type"] == "file" or c["type"] == "input_file":
50+
file = c.get("file", c.get("input_file", {}))
51+
filename = file.get("filename", "")
52+
file_id = file.get("file_id", "")
53+
file_data = file.get("file_data", "")
54+
file_str = f"<file: name:{filename}, id:{file_id}, data_length:{len(file_data)}>"
55+
print(_blue(file_str.strip()))
4956
print("\n")
5057

5158
if isinstance(outputs[0], dict):

tests/clients/test_lm.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,103 @@ def test_responses_api_converts_images_correctly():
716716
assert content[0]["image_url"] == "https://example.com/image.jpg"
717717

718718

719+
def test_responses_api_converts_files_correctly():
720+
from dspy.clients.lm import _convert_chat_request_to_responses_request
721+
722+
# Test with file data (base64 encoded)
723+
request_with_file = {
724+
"model": "openai/gpt-5-mini",
725+
"messages": [
726+
{
727+
"role": "user",
728+
"content": [
729+
{"type": "text", "text": "Analyze this file"},
730+
{
731+
"type": "file",
732+
"file": {
733+
"file_data": "data:text/plain;base64,SGVsbG8gV29ybGQ=",
734+
"filename": "test.txt",
735+
}
736+
}
737+
]
738+
}
739+
]
740+
}
741+
742+
result = _convert_chat_request_to_responses_request(request_with_file)
743+
744+
assert "input" in result
745+
assert len(result["input"]) == 1
746+
assert result["input"][0]["role"] == "user"
747+
748+
content = result["input"][0]["content"]
749+
assert len(content) == 2
750+
751+
# First item should be text converted to input_text format
752+
assert content[0]["type"] == "input_text"
753+
assert content[0]["text"] == "Analyze this file"
754+
755+
# Second item should be converted to input_file format
756+
assert content[1]["type"] == "input_file"
757+
assert content[1]["file_data"] == "data:text/plain;base64,SGVsbG8gV29ybGQ="
758+
assert content[1]["filename"] == "test.txt"
759+
760+
# Test with file_id
761+
request_with_file_id = {
762+
"model": "openai/gpt-5-mini",
763+
"messages": [
764+
{
765+
"role": "user",
766+
"content": [
767+
{
768+
"type": "file",
769+
"file": {
770+
"file_id": "file-abc123",
771+
"filename": "document.pdf",
772+
}
773+
}
774+
]
775+
}
776+
]
777+
}
778+
779+
result = _convert_chat_request_to_responses_request(request_with_file_id)
780+
781+
content = result["input"][0]["content"]
782+
assert len(content) == 1
783+
assert content[0]["type"] == "input_file"
784+
assert content[0]["file_id"] == "file-abc123"
785+
assert content[0]["filename"] == "document.pdf"
786+
787+
# Test with all file fields
788+
request_with_all_fields = {
789+
"model": "openai/gpt-5-mini",
790+
"messages": [
791+
{
792+
"role": "user",
793+
"content": [
794+
{
795+
"type": "file",
796+
"file": {
797+
"file_data": "data:application/pdf;base64,JVBERi0xLjQ=",
798+
"file_id": "file-xyz789",
799+
"filename": "report.pdf",
800+
}
801+
}
802+
]
803+
}
804+
]
805+
}
806+
807+
result = _convert_chat_request_to_responses_request(request_with_all_fields)
808+
809+
content = result["input"][0]["content"]
810+
assert content[0]["type"] == "input_file"
811+
assert content[0]["file_data"] == "data:application/pdf;base64,JVBERi0xLjQ="
812+
assert content[0]["file_id"] == "file-xyz789"
813+
assert content[0]["filename"] == "report.pdf"
814+
815+
719816
def test_responses_api_with_image_input():
720817
api_response = make_response(
721818
output_blocks=[

0 commit comments

Comments
 (0)