3434
3535# Load configuration
3636config = Config ()
37- VERBOSE = config .verbose
38-
3937
4038async def get_next_action (model , messages , objective , session_id ):
41- if VERBOSE :
39+ if config . verbose :
4240 print ("[Self-Operating Computer][get_next_action]" )
4341 print ("[Self-Operating Computer][get_next_action] model" , model )
4442 if model == "gpt-4" :
@@ -61,7 +59,7 @@ async def get_next_action(model, messages, objective, session_id):
6159
6260
6361def call_gpt_4_vision_preview (messages ):
64- if VERBOSE :
62+ if config . verbose :
6563 print ("[call_gpt_4_v]" )
6664 time .sleep (1 )
6765 client = config .initialize_openai ()
@@ -82,7 +80,7 @@ def call_gpt_4_vision_preview(messages):
8280 else :
8381 user_prompt = get_user_prompt ()
8482
85- if VERBOSE :
83+ if config . verbose :
8684 print (
8785 "[call_gpt_4_v] user_prompt" ,
8886 user_prompt ,
@@ -117,7 +115,7 @@ def call_gpt_4_vision_preview(messages):
117115 content = content [: - len ("```" )] # Remove ending
118116
119117 assistant_message = {"role" : "assistant" , "content" : content }
120- if VERBOSE :
118+ if config . verbose :
121119 print (
122120 "[call_gpt_4_v] content" ,
123121 content ,
@@ -137,7 +135,7 @@ def call_gpt_4_vision_preview(messages):
137135 f"{ ANSI_GREEN } [Self-Operating Computer]{ ANSI_RED } [Error] AI response was { ANSI_RESET } " ,
138136 content ,
139137 )
140- if VERBOSE :
138+ if config . verbose :
141139 traceback .print_exc ()
142140 return call_gpt_4_vision_preview (messages )
143141
@@ -146,7 +144,7 @@ def call_gemini_pro_vision(messages, objective):
146144 """
147145 Get the next action for Self-Operating Computer using Gemini Pro Vision
148146 """
149- if VERBOSE :
147+ if config . verbose :
150148 print (
151149 "[Self Operating Computer][call_gemini_pro_vision]" ,
152150 )
@@ -165,18 +163,18 @@ def call_gemini_pro_vision(messages, objective):
165163 prompt = get_system_prompt ("gemini-pro-vision" , objective )
166164
167165 model = config .initialize_google ()
168- if VERBOSE :
166+ if config . verbose :
169167 print ("[call_gemini_pro_vision] model" , model )
170168
171169 response = model .generate_content ([prompt , Image .open (screenshot_filename )])
172170
173171 content = response .text [1 :]
174- if VERBOSE :
172+ if config . verbose :
175173 print ("[call_gemini_pro_vision] response" , response )
176174 print ("[call_gemini_pro_vision] content" , content )
177175
178176 content = json .loads (content )
179- if VERBOSE :
177+ if config . verbose :
180178 print (
181179 "[get_next_action][call_gemini_pro_vision] content" ,
182180 content ,
@@ -188,14 +186,14 @@ def call_gemini_pro_vision(messages, objective):
188186 print (
189187 f"{ ANSI_GREEN } [Self-Operating Computer]{ ANSI_BRIGHT_MAGENTA } [Operate] That did not work. Trying another method { ANSI_RESET } "
190188 )
191- if VERBOSE :
189+ if config . verbose :
192190 print ("[Self-Operating Computer][Operate] error" , e )
193191 traceback .print_exc ()
194192 return call_gpt_4_vision_preview (messages )
195193
196194
197195async def call_gpt_4_vision_preview_ocr (messages , objective , model ):
198- if VERBOSE :
196+ if config . verbose :
199197 print ("[call_gpt_4_vision_preview_ocr]" )
200198
201199 # Construct the path to the file within the package
@@ -260,7 +258,7 @@ async def call_gpt_4_vision_preview_ocr(messages, objective, model):
260258 # Normalize line breaks and remove any unwanted characters
261259 content = "\n " .join (line .strip () for line in content .splitlines ())
262260
263- if VERBOSE :
261+ if config . verbose :
264262 print (
265263 "\n \n \n [call_gpt_4_vision_preview_ocr] content after cleaning" , content
266264 )
@@ -274,7 +272,7 @@ async def call_gpt_4_vision_preview_ocr(messages, objective, model):
274272 for operation in content :
275273 if operation .get ("operation" ) == "click" :
276274 text_to_click = operation .get ("text" )
277- if VERBOSE :
275+ if config . verbose :
278276 print (
279277 "[call_gpt_4_vision_preview_ocr][click] text_to_click" ,
280278 text_to_click ,
@@ -296,7 +294,7 @@ async def call_gpt_4_vision_preview_ocr(messages, objective, model):
296294 operation ["x" ] = coordinates ["x" ]
297295 operation ["y" ] = coordinates ["y" ]
298296
299- if VERBOSE :
297+ if config . verbose :
300298 print (
301299 "[call_gpt_4_vision_preview_ocr][click] text_element_index" ,
302300 text_element_index ,
@@ -324,7 +322,7 @@ async def call_gpt_4_vision_preview_ocr(messages, objective, model):
324322 print (
325323 f"{ ANSI_GREEN } [Self-Operating Computer]{ ANSI_BRIGHT_MAGENTA } [Operate] That did not work. Trying another method { ANSI_RESET } "
326324 )
327- if VERBOSE :
325+ if config . verbose :
328326 print ("[Self-Operating Computer][Operate] error" , e )
329327 traceback .print_exc ()
330328 return gpt_4_fallback (messages , objective , model )
@@ -356,7 +354,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
356354 else :
357355 user_prompt = get_user_prompt ()
358356
359- if VERBOSE :
357+ if config . verbose :
360358 print (
361359 "[call_gpt_4_vision_preview_labeled] user_prompt" ,
362360 user_prompt ,
@@ -393,7 +391,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
393391 content = content [: - len ("```" )] # Remove ending
394392
395393 assistant_message = {"role" : "assistant" , "content" : content }
396- if VERBOSE :
394+ if config . verbose :
397395 print (
398396 "[call_gpt_4_vision_preview_labeled] content" ,
399397 content ,
@@ -407,14 +405,14 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
407405 for operation in content :
408406 if operation .get ("operation" ) == "click" :
409407 label = operation .get ("label" )
410- if VERBOSE :
408+ if config . verbose :
411409 print (
412410 "[Self Operating Computer][call_gpt_4_vision_preview_labeled] label" ,
413411 label ,
414412 )
415413
416414 coordinates = get_label_coordinates (label , label_coordinates )
417- if VERBOSE :
415+ if config . verbose :
418416 print (
419417 "[Self Operating Computer][call_gpt_4_vision_preview_labeled] coordinates" ,
420418 coordinates ,
@@ -426,7 +424,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
426424 click_position_percent = get_click_position_in_percent (
427425 coordinates , image_size
428426 )
429- if VERBOSE :
427+ if config . verbose :
430428 print (
431429 "[Self Operating Computer][call_gpt_4_vision_preview_labeled] click_position_percent" ,
432430 click_position_percent ,
@@ -441,7 +439,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
441439 y_percent = f"{ click_position_percent [1 ]:.2f} "
442440 operation ["x" ] = x_percent
443441 operation ["y" ] = y_percent
444- if VERBOSE :
442+ if config . verbose :
445443 print (
446444 "[Self Operating Computer][call_gpt_4_vision_preview_labeled] new click operation" ,
447445 operation ,
@@ -450,7 +448,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
450448 else :
451449 processed_content .append (operation )
452450
453- if VERBOSE :
451+ if config . verbose :
454452 print (
455453 "[Self Operating Computer][call_gpt_4_vision_preview_labeled] new processed_content" ,
456454 processed_content ,
@@ -461,14 +459,14 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
461459 print (
462460 f"{ ANSI_GREEN } [Self-Operating Computer]{ ANSI_BRIGHT_MAGENTA } [Operate] That did not work. Trying another method { ANSI_RESET } "
463461 )
464- if VERBOSE :
462+ if config . verbose :
465463 print ("[Self-Operating Computer][Operate] error" , e )
466464 traceback .print_exc ()
467465 return call_gpt_4_vision_preview (messages )
468466
469467
470468def call_ollama_llava (messages ):
471- if VERBOSE :
469+ if config . verbose :
472470 print ("[call_ollama_llava]" )
473471 time .sleep (1 )
474472 try :
@@ -485,7 +483,7 @@ def call_ollama_llava(messages):
485483 else :
486484 user_prompt = get_user_prompt ()
487485
488- if VERBOSE :
486+ if config . verbose :
489487 print (
490488 "[call_ollama_llava] user_prompt" ,
491489 user_prompt ,
@@ -516,7 +514,7 @@ def call_ollama_llava(messages):
516514 content = content [: - len ("```" )] # Remove ending
517515
518516 assistant_message = {"role" : "assistant" , "content" : content }
519- if VERBOSE :
517+ if config . verbose :
520518 print (
521519 "[call_ollama_llava] content" ,
522520 content ,
@@ -542,7 +540,7 @@ def call_ollama_llava(messages):
542540 f"{ ANSI_GREEN } [Self-Operating Computer]{ ANSI_RED } [Error] AI response was { ANSI_RESET } " ,
543541 content ,
544542 )
545- if VERBOSE :
543+ if config . verbose :
546544 traceback .print_exc ()
547545 return call_ollama_llava (messages )
548546
@@ -562,15 +560,15 @@ def get_last_assistant_message(messages):
562560
563561
564562def gpt_4_fallback (messages , objective , model ):
565- if VERBOSE :
563+ if config . verbose :
566564 print ("[gpt_4_fallback]" )
567565 system_prompt = get_system_prompt ("gpt-4-vision-preview" , objective )
568566 new_system_message = {"role" : "system" , "content" : system_prompt }
569567 # remove and replace the first message in `messages` with `new_system_message`
570568
571569 messages [0 ] = new_system_message
572570
573- if VERBOSE :
571+ if config . verbose :
574572 print ("[gpt_4_fallback][updated]" )
575573 print ("[gpt_4_fallback][updated] len(messages)" , len (messages ))
576574
@@ -581,7 +579,7 @@ def confirm_system_prompt(messages, objective, model):
581579 """
582580 On `Exception` we default to `call_gpt_4_vision_preview` so we have this function to reassign system prompt in case of a previous failure
583581 """
584- if VERBOSE :
582+ if config . verbose :
585583 print ("[confirm_system_prompt] model" , model )
586584
587585 system_prompt = get_system_prompt (model , objective )
@@ -590,7 +588,7 @@ def confirm_system_prompt(messages, objective, model):
590588
591589 messages [0 ] = new_system_message
592590
593- if VERBOSE :
591+ if config . verbose :
594592 print ("[confirm_system_prompt]" )
595593 print ("[confirm_system_prompt] len(messages)" , len (messages ))
596594 for m in messages :
0 commit comments