Skip to content

Commit f783fcf

Browse files
Add integration test for image file viewing with FileEditor
This test verifies that the FileEditor can view and display image files by having an agent analyze the OpenHands logo and identify its colors. The test: 1. Downloads the OpenHands logo to the workspace 2. Asks the agent to view the logo.png file and identify its colors 3. Verifies that the agent correctly identifies yellow as one of the colors This test requires a vision-capable LLM model. Co-authored-by: openhands <openhands@all-hands.dev>
1 parent 3f908b9 commit f783fcf

File tree

1 file changed

+92
-0
lines changed

1 file changed

+92
-0
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""Test that an agent can view and analyze image files using FileEditor."""
2+
3+
import os
4+
import urllib.request
5+
6+
from openhands.sdk import TextContent, get_logger
7+
from openhands.sdk.event.llm_convertible import MessageEvent
8+
from openhands.sdk.tool import Tool, register_tool
9+
from openhands.tools.file_editor import FileEditorTool
10+
from openhands.tools.terminal import TerminalTool
11+
from tests.integration.base import BaseIntegrationTest, TestResult
12+
13+
14+
INSTRUCTION = (
15+
"Please view the logo.png file in the current directory and tell me what "
16+
"colors you see in it. Is the logo blue, yellow, or green? Please analyze "
17+
"the image and provide your answer."
18+
)
19+
20+
IMAGE_URL = "https://github.com/OpenHands/docs/raw/main/openhands/static/img/logo.png"
21+
22+
logger = get_logger(__name__)
23+
24+
25+
class ImageFileViewingTest(BaseIntegrationTest):
26+
"""Test that an agent can view and analyze image files."""
27+
28+
INSTRUCTION: str = INSTRUCTION
29+
30+
def __init__(self, *args, **kwargs):
31+
super().__init__(*args, **kwargs)
32+
self.logo_path: str = os.path.join(self.workspace, "logo.png")
33+
34+
# Verify that the LLM supports vision
35+
if not self.llm.vision_is_active():
36+
raise ValueError(
37+
"This test requires a vision-capable LLM model. "
38+
"Please use a model that supports image input."
39+
)
40+
41+
@property
42+
def tools(self) -> list[Tool]:
43+
"""List of tools available to the agent."""
44+
register_tool("TerminalTool", TerminalTool)
45+
register_tool("FileEditorTool", FileEditorTool)
46+
return [
47+
Tool(name="TerminalTool"),
48+
Tool(name="FileEditorTool"),
49+
]
50+
51+
def setup(self) -> None:
52+
"""Download the OpenHands logo for the agent to analyze."""
53+
try:
54+
urllib.request.urlretrieve(IMAGE_URL, self.logo_path)
55+
logger.info(f"Downloaded test logo to: {self.logo_path}")
56+
except Exception as e:
57+
logger.error(f"Failed to download logo: {e}")
58+
raise
59+
60+
def verify_result(self) -> TestResult:
61+
"""Verify that the agent identified yellow as one of the logo colors."""
62+
if not os.path.exists(self.logo_path):
63+
return TestResult(
64+
success=False, reason="Logo file not found after agent execution"
65+
)
66+
67+
# Check the agent's responses in collected events
68+
# Look for messages mentioning yellow color
69+
agent_responses = []
70+
for event in self.collected_events:
71+
if isinstance(event, MessageEvent):
72+
message = event.llm_message
73+
if message.role == "assistant":
74+
for content_item in message.content:
75+
if isinstance(content_item, TextContent):
76+
agent_responses.append(content_item.text.lower())
77+
78+
combined_response = " ".join(agent_responses)
79+
80+
if "yellow" in combined_response:
81+
return TestResult(
82+
success=True,
83+
reason="Agent successfully identified yellow color in the logo",
84+
)
85+
else:
86+
return TestResult(
87+
success=False,
88+
reason=(
89+
f"Agent did not identify yellow color in the logo. "
90+
f"Response: {combined_response[:500]}"
91+
),
92+
)

0 commit comments

Comments
 (0)