Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion dspy/evaluate/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def _prepare_results_output(
(
merge_dicts(example, prediction) | {metric_name: score}
if prediction_is_dictlike(prediction)
else dict(example) | {"prediction": prediction, metric_name: score}
else example.toDict() | {"prediction": prediction, metric_name: score}
)
for example, prediction, score in results
]
Expand Down Expand Up @@ -305,6 +305,12 @@ def prediction_is_dictlike(prediction):


def merge_dicts(d1, d2) -> dict:
# Convert to dict if objects have toDict method (e.g., Example objects)
if hasattr(d1, "toDict"):
d1 = d1.toDict()
if hasattr(d2, "toDict"):
d2 = d2.toDict()

merged = {}
for k, v in d1.items():
if k in d2:
Expand Down
9 changes: 9 additions & 0 deletions dspy/primitives/example.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
try:
from pydantic import BaseModel
except ImportError:
BaseModel = None


class Example:
"""A flexible data container for DSPy examples and training data.

Expand Down Expand Up @@ -193,6 +199,9 @@ def toDict(self): # noqa: N802
def convert_to_serializable(value):
if hasattr(value, "toDict"):
return value.toDict()
elif BaseModel is not None and isinstance(value, BaseModel):
# Handle Pydantic models (e.g., dspy.History)
return value.model_dump()
elif isinstance(value, list):
return [convert_to_serializable(item) for item in value]
elif isinstance(value, dict):
Expand Down
135 changes: 135 additions & 0 deletions tests/evaluate/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import signal
import tempfile
import threading
from unittest.mock import patch

Expand Down Expand Up @@ -261,3 +263,136 @@ def on_evaluate_end(
def test_evaluation_result_repr():
result = EvaluationResult(score=100.0, results=[(new_example("What is 1+1?", "2"), {"answer": "2"}, 100.0)])
assert repr(result) == "EvaluationResult(score=100.0, results=<list of 1 results>)"


def test_evaluate_save_as_json_with_history():
"""Test that save_as_json works with Examples containing dspy.History objects."""
# Setup
dspy.settings.configure(
lm=DummyLM(
{
"What is 1+1?": {"answer": "2"},
"What is 2+2?": {"answer": "4"},
}
)
)

# Create history objects
history1 = dspy.History(
messages=[
{"question": "Previous Q1", "answer": "Previous A1"},
]
)
history2 = dspy.History(
messages=[
{"question": "Previous Q2", "answer": "Previous A2"},
{"question": "Previous Q3", "answer": "Previous A3"},
]
)

# Create examples with history
devset = [
dspy.Example(question="What is 1+1?", answer="2", history=history1).with_inputs("question"),
dspy.Example(question="What is 2+2?", answer="4", history=history2).with_inputs("question"),
]

program = Predict("question -> answer")

# Create evaluator with save_as_json
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
temp_json = f.name

try:
evaluator = Evaluate(
devset=devset,
metric=answer_exact_match,
display_progress=False,
save_as_json=temp_json,
)

result = evaluator(program)
assert result.score == 100.0

# Verify JSON file was created and is valid
with open(temp_json) as f:
data = json.load(f)

assert len(data) == 2

# Verify history was properly serialized in first record
assert "history" in data[0]
assert isinstance(data[0]["history"], dict)
assert "messages" in data[0]["history"]
assert len(data[0]["history"]["messages"]) == 1
assert data[0]["history"]["messages"][0] == {"question": "Previous Q1", "answer": "Previous A1"}

# Verify history was properly serialized in second record
assert "history" in data[1]
assert isinstance(data[1]["history"], dict)
assert "messages" in data[1]["history"]
assert len(data[1]["history"]["messages"]) == 2
assert data[1]["history"]["messages"][0] == {"question": "Previous Q2", "answer": "Previous A2"}
assert data[1]["history"]["messages"][1] == {"question": "Previous Q3", "answer": "Previous A3"}

finally:
import os
if os.path.exists(temp_json):
os.unlink(temp_json)


def test_evaluate_save_as_csv_with_history():
"""Test that save_as_csv works with Examples containing dspy.History objects."""
# Setup
dspy.settings.configure(
lm=DummyLM(
{
"What is 1+1?": {"answer": "2"},
}
)
)

# Create history object
history = dspy.History(
messages=[
{"question": "Previous Q", "answer": "Previous A"},
]
)

# Create example with history
devset = [
dspy.Example(question="What is 1+1?", answer="2", history=history).with_inputs("question"),
]

program = Predict("question -> answer")

# Create evaluator with save_as_csv
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
temp_csv = f.name

try:
evaluator = Evaluate(
devset=devset,
metric=answer_exact_match,
display_progress=False,
save_as_csv=temp_csv,
)

result = evaluator(program)
assert result.score == 100.0

# Verify CSV file was created
import csv
with open(temp_csv) as f:
reader = csv.DictReader(f)
rows = list(reader)

assert len(rows) == 1
assert "history" in rows[0]
# CSV will have string representation of the dict
assert "messages" in rows[0]["history"]

finally:
import os
if os.path.exists(temp_csv):
os.unlink(temp_csv)

31 changes: 31 additions & 0 deletions tests/primitives/test_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,34 @@ def test_example_copy_without():
def test_example_to_dict():
example = Example(a=1, b=2)
assert example.toDict() == {"a": 1, "b": 2}


def test_example_to_dict_with_history():
"""Test that Example.toDict() properly serializes dspy.History objects."""
history = dspy.History(
messages=[
{"question": "What is the capital of France?", "answer": "Paris"},
{"question": "What is the capital of Germany?", "answer": "Berlin"},
]
)
example = Example(question="Test question", history=history, answer="Test answer")

result = example.toDict()

# Verify the result is a dictionary
assert isinstance(result, dict)
assert "history" in result

# Verify history is serialized to a dict (not a History object)
assert isinstance(result["history"], dict)
assert "messages" in result["history"]
assert result["history"]["messages"] == [
{"question": "What is the capital of France?", "answer": "Paris"},
{"question": "What is the capital of Germany?", "answer": "Berlin"},
]

# Verify JSON serialization works
import json
json_str = json.dumps(result)
restored = json.loads(json_str)
assert restored["history"]["messages"] == result["history"]["messages"]