@@ -93,15 +93,15 @@ def smooth_move_to(x, y, duration=1.2):
9393
9494class ComputerTool (BaseAnthropicTool ):
9595 """
96- A tool that allows the agent to interact with the screen, keyboard, and mouse of the current computer .
96+ A tool that allows the agent to interact with the primary monitor's screen, keyboard, and mouse.
9797 The tool parameters are defined by Anthropic and are not editable.
9898 """
9999
100100 name : Literal ["computer" ] = "computer"
101101 api_type : Literal ["computer_20241022" ] = "computer_20241022"
102102 width : int
103103 height : int
104- display_num : int | None
104+ display_num : None # Simplified to always be None since we're only using primary display
105105
106106 _screenshot_delay = 2.0
107107 _scaling_enabled = True
@@ -122,17 +122,8 @@ def to_params(self) -> BetaToolComputerUse20241022Param:
122122
123123 def __init__ (self ):
124124 super ().__init__ ()
125-
126125 self .width , self .height = pyautogui .size ()
127-
128- if (display_num := os .getenv ("DISPLAY_NUM" )) is not None :
129- self .display_num = int (display_num )
130- self ._display_prefix = f"DISPLAY=:{ self .display_num } "
131- else :
132- self .display_num = None
133- self ._display_prefix = ""
134-
135- self .xdotool = f"{ self ._display_prefix } xdotool"
126+ self .display_num = None
136127
137128 async def __call__ (
138129 self ,
@@ -230,7 +221,6 @@ def normalize_key(key):
230221
231222 async def screenshot (self ):
232223 """Take a screenshot of the current screen and return the base64 encoded image."""
233- # Use a user-writable directory for temporary files
234224 temp_dir = Path (tempfile .gettempdir ())
235225 path = temp_dir / f"screenshot_{ uuid4 ().hex } .png"
236226
@@ -241,9 +231,12 @@ async def screenshot(self):
241231 x , y = self .scale_coordinates (
242232 ScalingSource .COMPUTER , self .width , self .height
243233 )
244- await self .shell (
245- f"convert { path } -resize { x } x{ y } ! { path } " , take_screenshot = False
246- )
234+ # Use PIL directly instead of shell convert command
235+ from PIL import Image
236+
237+ with Image .open (path ) as img :
238+ img = img .resize ((x , y ), Image .Resampling .LANCZOS )
239+ img .save (path )
247240
248241 if path .exists ():
249242 base64_image = base64 .b64encode (path .read_bytes ()).decode ()
0 commit comments