Skip to content

Commit f781cfe

Browse files
committed
Pass model to operate
1 parent e253790 commit f781cfe

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

evaluate.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,10 @@ def evaluate_final_screenshot(guideline):
111111
return parse_eval_content(eval_content)
112112

113113

114-
def run_test_case(objective, guideline):
115-
'''Returns True if the result of the test with the given prompt meets the given guideline.'''
116-
# Run `operate` with the test case prompt
117-
subprocess.run(['operate', '--prompt', f'"{objective}"'], stdout=subprocess.DEVNULL)
114+
def run_test_case(objective, guideline, model):
115+
'''Returns True if the result of the test with the given prompt meets the given guideline for the given model.'''
116+
# Run `operate` with the model to evaluate and the test case prompt
117+
subprocess.run(['operate', '-m', model, '--prompt', f'"{objective}"'], stdout=subprocess.DEVNULL)
118118

119119
try:
120120
result = evaluate_final_screenshot(guideline)
@@ -154,7 +154,7 @@ def main():
154154
for objective, guideline in TEST_CASES.items():
155155
print(f"{ANSI_BLUE}[EVALUATING]{ANSI_RESET} '{objective}'")
156156

157-
result = run_test_case(objective, guideline)
157+
result = run_test_case(objective, guideline, model)
158158
if result:
159159
print(f"{ANSI_GREEN}[PASSED]{ANSI_RESET} '{objective}'")
160160
passed += 1

0 commit comments

Comments
 (0)