Skip to content

Commit 1757b8e

Browse files
committed
feat: logged task execution results
1 parent 3c24013 commit 1757b8e

File tree

5 files changed

+84
-48
lines changed

5 files changed

+84
-48
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,6 @@
1818
utils/reload/*.js
1919
utils/reload/injections/*.js
2020
public/manifest.json
21+
22+
# test results
23+
/results

benchmark.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ def setup_driver():
3333
chrome_options.add_argument("--window-size=1600,900")
3434
service = Service(ChromeDriverManager().install())
3535
driver = webdriver.Chrome(service=service, options=chrome_options)
36-
# Set script timeout to 240 seconds
37-
driver.set_script_timeout(240)
36+
# Set script timeout to 120 seconds
37+
driver.set_script_timeout(120)
3838
return driver
3939

4040
def dispatch_event(driver, event_name, event):
@@ -46,7 +46,7 @@ def dispatch_event(driver, event_name, event):
4646
driver.execute_script(script)
4747

4848
def add_task_listener(driver, task_id, max_retries=3):
49-
logging.info(f'Adding task listener for task {task_id}')
49+
logging.info(f'Adding task listeners for task {task_id}')
5050
"""
5151
Add event listeners for task history and screenshot events. Both events include task status.
5252
Then process those events as they are captured.
@@ -79,16 +79,25 @@ def add_task_listener(driver, task_id, max_retries=3):
7979
"""
8080

8181
attempts = 0
82+
result = ""
8283

8384
def handle_event(event_data):
8485
nonlocal attempts
86+
nonlocal result
8587
if not event_data:
8688
logging.info("No event data received")
8789
return
8890
if event_data['type'] == 'history':
8991
# Record history when task stops
90-
logging.info(f'Task {task_id} status: {event_data["status"]}')
91-
write_history(task_id, event_data['data'])
92+
result = event_data["status"]
93+
# Determine the last action status
94+
history = event_data['data']
95+
last_action = history[-1]["action"]["operation"]["name"]
96+
if last_action == "finish":
97+
result = "success"
98+
else:
99+
result = "fail"
100+
write_history(task_id, history)
92101
attempts = 0
93102
return
94103
if event_data['type'] == 'screenshot':
@@ -98,7 +107,8 @@ def handle_event(event_data):
98107
handle_event(driver.execute_async_script(script))
99108
else:
100109
logging.error(f"Unhandled event data type: {event_data['type']}")
101-
raise ValueError(f"Unhandled event data type: {event_data['type']}")
110+
result = "script-error"
111+
return
102112

103113
while attempts < max_retries:
104114
try:
@@ -108,15 +118,25 @@ def handle_event(event_data):
108118
if "javascript error: document unloaded while waiting for result" in str(e):
109119
attempts += 1
110120
logging.warning(f'Document unloaded error during task {task_id} attempt {attempts}: {str(e)}')
111-
logging.info("Retrying...")
112121
if attempts == max_retries:
113122
logging.error(f'Maximum retry attempts reached for task {task_id}.')
123+
result = 'doc-unload-max-retry'
124+
break
125+
else:
126+
logging.info("Retrying...")
127+
elif "script timeout" in str(e):
128+
logging.error(f'Script timeout for task {task_id}: {str(e)}')
129+
result = 'js-script-timeout'
130+
break
114131
else:
115132
logging.error(f'WebDriver exception for task {task_id}: {str(e)}')
133+
result = 'webdriver-error'
116134
break
117135
except Exception as e:
118136
logging.error(f'Unhandled error for task {task_id}: {str(e)}')
137+
result = 'python-script-error'
119138
break
139+
return result
120140

121141
def write_history(task_id, task_history):
122142
task_dir = os.path.join(results_dir, f"test{task_id}")
@@ -143,9 +163,10 @@ def run_webwand_task(driver, task_id, task_description):
143163
dispatch_event(driver, 'SetAPIKey', {"value": api_key})
144164
dispatch_event(driver, 'SetTask', {"value": task_description})
145165
dispatch_event(driver, 'RunTask', {})
146-
add_task_listener(driver, task_id)
166+
result = add_task_listener(driver, task_id)
147167
end = time.time()
148168
logging.info(f'Task {task_id} took {end - start} seconds to complete.')
169+
return result
149170

150171
def click_extensions_icon(driver):
151172
# Simulate click to open side panel
@@ -173,7 +194,13 @@ def main():
173194
click_extensions_icon(driver)
174195
initial_load = False
175196

176-
run_webwand_task(driver, task_id, task['ques'])
197+
result = run_webwand_task(driver, task_id, task['ques'])
198+
logging.info(f'Task {task_id} status: {result}')
199+
# Optional: if the previous task timed out, reset the driver after each task to ensure proper state for the next task
200+
# if result == "js-script-timeout":
201+
# driver.quit()
202+
# driver = setup_driver()
203+
# initial_load = True
177204
driver.quit()
178205

179206
if __name__ == "__main__":

results/testGoogle--0/interact_messages.json

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
},
99
"name": "setValueAndEnter"
1010
},
11-
"thought": "I will enter 'bananas' into the search bar and press Enter to search."
11+
"thought": "I will enter 'bananas' in the search bar and press Enter to search."
1212
},
13-
"prompt": "The user requests the following task:\n\n Search for bananas\n\n\nCurrent time: 5/16/2024, 2:57:41 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 9\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 10\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 12\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 13\nname = Advertising\ntagName = A\n===\nlabel = 14\nname = Business\ntagName = A\n===\nlabel = 15\nname = How Search works\ntagName = A\n===\nlabel = 16\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 17\nname = Privacy\ntagName = A\n===\nlabel = 18\nname = Terms\ntagName = A\n===\nlabel = 19\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n",
14-
"response": "{\n \"thought\": \"I will enter 'bananas' into the search bar and press Enter to search.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"8\",\n \"value\": \"bananas\"\n }\n }\n}",
13+
"prompt": "The user requests the following task:\n\n Search for bananas\n\n\nCurrent time: 5/16/2024, 3:32:45 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 9\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 10\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 12\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 13\nname = Advertising\ntagName = A\n===\nlabel = 14\nname = Business\ntagName = A\n===\nlabel = 15\nname = How Search works\ntagName = A\n===\nlabel = 16\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 17\nname = Privacy\ntagName = A\n===\nlabel = 18\nname = Terms\ntagName = A\n===\nlabel = 19\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n",
14+
"response": "{\n \"thought\": \"I will enter 'bananas' in the search bar and press Enter to search.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"8\",\n \"value\": \"bananas\"\n }\n }\n}",
1515
"usage": {
1616
"completion_tokens": 63,
1717
"prompt_tokens": 2599,
@@ -23,14 +23,14 @@
2323
"operation": {
2424
"name": "finish"
2525
},
26-
"thought": "The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas, followed by related questions and nutritional information."
26+
"thought": "The search results for 'bananas' are displayed. The top result is a Wikipedia link about bananas, followed by a 'People also ask' section with questions about the benefits, daily consumption, nutritional content, and best time to eat bananas."
2727
},
28-
"prompt": "The user requests the following task:\n\n Search for bananas\nYou have already taken the following actions: \nThought: I will enter 'bananas' into the search bar and press Enter to search.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"8\",\"value\":\"bananas\"}}\n\n\n\nCurrent time: 5/16/2024, 2:57:49 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 22.1%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = bananas\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = Forums\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Nutrition facts\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Recipes\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add In stock\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Plant\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Tree\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Who should avoid\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Banana Wikipedia https://en.wikipedia.org \u203a wiki \u203a Banana\ntagName = A\n===\nlabel = 27\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 28\nname = List of banana cultivars\ntagName = A\n===\nlabel = 29\nname = Cavendish banana\ntagName = A\n===\nlabel = 30\nname = Gros Michel\ntagName = A\n===\nlabel = 31\nname = Cooking banana\ntagName = A\n===\nlabel = 32\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 33\nname = What are the benefits of eating bananas?\ntagName = DIV\nrole = button\n===\nlabel = 34\nname = Is it good to eat a banana every day?\ntagName = DIV\nrole = button\n===\nlabel = 35\nname = What are bananas high in?\ntagName = DIV\nrole = button\n===\nlabel = 36\nname = What is the best time to eat bananas?\ntagName = DIV\nrole = button\n===\nlabel = 37\nname = Feedback\ntagName = DIV\nrole = button\n===\nlabel = 38\nname = Food Nutrition\u2026Banana nutrition\ntagName = DIV\nrole = button\n===\nlabel = 39\nname = Protein Amount\u2026protein in banana\ntagName = DIV\nrole = button\n===\nlabel = 40\nname = Equivalent\u2026what fruit is equivalent to a banana\ntagName = DIV\nrole = button\n===\nlabel = 41\nname = Sugar Amount\u2026how much sugar in a banana\ntagName = DIV\nrole = button\n===\nlabel = 42\nname = 6 more\ntagName = DIV\nrole = button\n===\nlabel = 43\nname = Feedback\ntagName = DIV\nrole = button\n",
29-
"response": "{\n \"thought\": \"The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas, followed by related questions and nutritional information.\",\n \"action\": {\n \"name\": \"finish\"\n }\n}",
28+
"prompt": "The user requests the following task:\n\n Search for bananas\nYou have already taken the following actions: \nThought: I will enter 'bananas' in the search bar and press Enter to search.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"8\",\"value\":\"bananas\"}}\n\n\n\nCurrent time: 5/16/2024, 3:32:54 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 20.3%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = bananas\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = Forums\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Nutrition facts\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Recipes\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add In stock\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Tree\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Plant\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Who should avoid\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Banana Wikipedia https://en.wikipedia.org \u203a wiki \u203a Banana\ntagName = A\n===\nlabel = 27\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 28\nname = List of banana cultivars\ntagName = A\n===\nlabel = 29\nname = Cavendish banana\ntagName = A\n===\nlabel = 30\nname = Gros Michel\ntagName = A\n===\nlabel = 31\nname = Cooking banana\ntagName = A\n===\nlabel = 32\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 33\nname = What are the benefits of eating bananas?\ntagName = DIV\nrole = button\n===\nlabel = 34\nname = Is it good to eat a banana every day?\ntagName = DIV\nrole = button\n===\nlabel = 35\nname = What are bananas high in?\ntagName = DIV\nrole = button\n===\nlabel = 36\nname = What is the best time to eat bananas?\ntagName = DIV\nrole = button\n===\nlabel = 37\nname = Feedback\ntagName = DIV\nrole = button\n===\nlabel = 38\nname = Food Nutrition\u2026Banana nutrition\ntagName = DIV\nrole = button\n===\nlabel = 39\nname = Protein Amount\u2026protein in banana\ntagName = DIV\nrole = button\n===\nlabel = 40\nname = Equivalent\u2026what fruit is equivalent to a banana\ntagName = DIV\nrole = button\n===\nlabel = 41\nname = Sugar Amount\u2026how much sugar in a banana\ntagName = DIV\nrole = button\n===\nlabel = 42\nname = 6 more\ntagName = DIV\nrole = button\n===\nlabel = 43\nname = Feedback\ntagName = DIV\nrole = button\n",
29+
"response": "{\n \"thought\": \"The search results for 'bananas' are displayed. The top result is a Wikipedia link about bananas, followed by a 'People also ask' section with questions about the benefits, daily consumption, nutritional content, and best time to eat bananas.\",\n \"action\": {\n \"name\": \"finish\"\n }\n}",
3030
"usage": {
31-
"completion_tokens": 50,
31+
"completion_tokens": 70,
3232
"prompt_tokens": 3147,
33-
"total_tokens": 3197
33+
"total_tokens": 3217
3434
}
3535
}
3636
]

0 commit comments

Comments
 (0)