2424 get_label_coordinates ,
2525)
2626from operate .utils .ocr import get_text_coordinates , get_text_element
27- from operate .utils .screenshot import capture_screen_with_cursor
27+ from operate .utils .screenshot import capture_screen_with_cursor , compress_screenshot
2828from operate .utils .style import ANSI_BRIGHT_MAGENTA , ANSI_GREEN , ANSI_RED , ANSI_RESET
2929
3030# Load configuration
@@ -153,9 +153,13 @@ async def call_qwen_vl_with_ocr(messages, objective, model):
153153 if not os .path .exists (screenshots_dir ):
154154 os .makedirs (screenshots_dir )
155155
156- screenshot_filename = os .path .join (screenshots_dir , "screenshot.png" )
157156 # Call the function to capture the screen with the cursor
158- capture_screen_with_cursor (screenshot_filename )
157+ raw_screenshot_filename = os .path .join (screenshots_dir , "raw_screenshot.png" )
158+ capture_screen_with_cursor (raw_screenshot_filename )
159+
160+ # Compress screenshot image to make size be smaller
161+ screenshot_filename = os .path .join (screenshots_dir , "screenshot.jpeg" )
162+ compress_screenshot (raw_screenshot_filename , screenshot_filename )
159163
160164 with open (screenshot_filename , "rb" ) as img_file :
161165 img_base64 = base64 .b64encode (img_file .read ()).decode ("utf-8" )
@@ -179,7 +183,7 @@ async def call_qwen_vl_with_ocr(messages, objective, model):
179183 messages .append (vision_message )
180184
181185 response = client .chat .completions .create (
182- model = "qwen2.5-vl-7b -instruct" ,
186+ model = "qwen2.5-vl-72b -instruct" ,
183187 messages = messages ,
184188 )
185189
0 commit comments