44import os
55import platform
66import sys
7+ import time
78import traceback
89import uuid
910from datetime import datetime
1011from typing import Any , cast
1112
12- from prompt_toolkit import PromptSession
1313from readchar import readchar
1414
1515from .misc .get_input import get_input
2424litellm .suppress_debug_info = True
2525litellm .REPEATED_STREAMING_CHUNK_LIMIT = 99999999
2626litellm .modify_params = True
27+ # litellm.drop_params = True
2728
2829from anthropic import Anthropic
2930from anthropic .types .beta import (
@@ -245,6 +246,10 @@ async def async_respond(self):
245246 provider = self .provider # Keep existing provider if set
246247 max_tokens = self .max_tokens # Keep existing max_tokens if set
247248
249+ if self .model == "claude-3-5-sonnet-latest" :
250+ # For some reason, Litellm can't find the model info for claude-3-5-sonnet-latest
251+ provider = "anthropic"
252+
248253 # Only try to get model info if we need either provider or max_tokens
249254 if provider is None or max_tokens is None :
250255 try :
@@ -610,8 +615,53 @@ async def async_respond(self):
610615 }
611616 )
612617 if "gui" in self .tools :
613- print ("\n GUI is not supported for non-Anthropic models yet.\n " )
614- pass
618+ tools .append (
619+ {
620+ "type" : "function" ,
621+ "function" : {
622+ "name" : "computer" ,
623+ "description" : """Control the computer's mouse, keyboard and screen interactions
624+ * Coordinates are scaled to standard resolutions (max 1366x768)
625+ * Screenshots are automatically taken after most actions
626+ * For key commands, use normalized key names (e.g. 'pagedown' -> 'pgdn', 'enter'/'return' are interchangeable)
627+ * On macOS, 'super+' is automatically converted to 'command+'
628+ * Mouse movements use smooth easing for natural motion""" ,
629+ "parameters" : {
630+ "type" : "object" ,
631+ "properties" : {
632+ "action" : {
633+ "type" : "string" ,
634+ "description" : "The action to perform" ,
635+ "enum" : [
636+ "key" , # Send keyboard input (hotkeys or single keys)
637+ "type" , # Type text with a slight delay between characters
638+ "mouse_move" , # Move mouse cursor to coordinates
639+ "left_click" , # Perform left mouse click
640+ "left_click_drag" , # Click and drag from current pos to coordinates
641+ "right_click" , # Perform right mouse click
642+ "middle_click" , # Perform middle mouse click
643+ "double_click" , # Perform double left click
644+ "screenshot" , # Take a screenshot
645+ "cursor_position" , # Get current cursor coordinates
646+ ],
647+ },
648+ "text" : {
649+ "type" : "string" ,
650+ "description" : "Text to type or key command to send (required for 'key' and 'type' actions)" ,
651+ },
652+ "coordinate" : {
653+ "type" : "array" ,
654+ "description" : "X,Y coordinates for mouse actions (required for 'mouse_move' and 'left_click_drag')" ,
655+ "items" : {"type" : "integer" },
656+ "minItems" : 2 ,
657+ "maxItems" : 2 ,
658+ },
659+ },
660+ "required" : ["action" ],
661+ },
662+ },
663+ }
664+ )
615665
616666 if self .model .startswith ("ollama/" ):
617667 # Fix ollama
@@ -645,6 +695,7 @@ async def async_respond(self):
645695 "temperature" : self .temperature ,
646696 "api_key" : self .api_key ,
647697 "api_version" : self .api_version ,
698+ "parallel_tool_calls" : False ,
648699 }
649700
650701 if self .tool_calling :
@@ -658,7 +709,6 @@ async def async_respond(self):
658709
659710 if self .debug :
660711 print ("Sending request..." , params )
661- import time
662712
663713 time .sleep (3 )
664714
@@ -815,13 +865,36 @@ async def async_respond(self):
815865 result = ToolResult (output = "Tool execution cancelled by user" )
816866
817867 if self .tool_calling :
818- self .messages .append (
819- {
820- "role" : "tool" ,
821- "content" : json .dumps (dataclasses .asdict (result )),
822- "tool_call_id" : tool_call .id ,
823- }
824- )
868+ if result .base64_image :
869+ # Add image to tool result
870+ self .messages .append (
871+ {
872+ "role" : "tool" ,
873+ "content" : "The user will reply with the image outputted by the tool." ,
874+ "tool_call_id" : tool_call .id ,
875+ }
876+ )
877+ self .messages .append (
878+ {
879+ "role" : "user" ,
880+ "content" : [
881+ {
882+ "type" : "image_url" ,
883+ "image_url" : {
884+ "url" : f"data:image/png;base64,{ result .base64_image } " ,
885+ },
886+ }
887+ ],
888+ }
889+ )
890+ else :
891+ self .messages .append (
892+ {
893+ "role" : "tool" ,
894+ "content" : json .dumps (dataclasses .asdict (result )),
895+ "tool_call_id" : tool_call .id ,
896+ }
897+ )
825898 else :
826899 self .messages .append (
827900 {
0 commit comments