1+ import time
2+
3+ import html2text
14import requests
5+ from selenium import webdriver
6+ from selenium .webdriver .chrome .service import Service
7+ from selenium .webdriver .common .by import By
8+ from selenium .webdriver .common .keys import Keys
9+ from webdriver_manager .chrome import ChromeDriverManager
210
311
412class Browser :
513 def __init__ (self , computer ):
614 self .computer = computer
15+ self ._driver = None
16+
17+ @property
18+ def driver (self ):
19+ if self ._driver is None :
20+ self .setup ()
21+ return self ._driver
22+
23+ @driver .setter
24+ def driver (self , value ):
25+ self ._driver = value
726
827 def search (self , query ):
928 """
@@ -14,3 +33,89 @@ def search(self, query):
1433 params = {"query" : query },
1534 )
1635 return response .json ()["result" ]
36+
37+ def setup (self ):
38+ self .service = Service (ChromeDriverManager ().install ())
39+ self .options = webdriver .ChromeOptions ()
40+ self ._driver = webdriver .Chrome (service = self .service , options = self .options )
41+
42+ def go_to_url (self , url ):
43+ """Navigate to a URL"""
44+ self .driver .get (url )
45+ time .sleep (3 )
46+
47+ def search_google (self , query ):
48+ """Perform a Google search"""
49+ self .driver .get ("https://www.perplexity.ai" )
50+ # search_box = self.driver.find_element(By.NAME, 'q')
51+ # search_box.send_keys(query)
52+ # search_box.send_keys(Keys.RETURN)
53+ body = self .driver .find_element (By .TAG_NAME , "body" )
54+ body .send_keys (Keys .COMMAND + "k" )
55+ time .sleep (0.5 )
56+ active_element = self .driver .switch_to .active_element
57+ active_element .send_keys (query )
58+ active_element .send_keys (Keys .RETURN )
59+ time .sleep (5 )
60+
61+ def analyze_page (self , intent ):
62+ """Extract HTML, list interactive elements, and analyze with AI"""
63+ html_content = self .driver .page_source
64+ text_content = html2text .html2text (html_content )
65+
66+ elements = (
67+ self .driver .find_elements (By .TAG_NAME , "a" )
68+ + self .driver .find_elements (By .TAG_NAME , "button" )
69+ + self .driver .find_elements (By .TAG_NAME , "input" )
70+ + self .driver .find_elements (By .TAG_NAME , "select" )
71+ )
72+
73+ elements_info = [
74+ {
75+ "id" : idx ,
76+ "text" : elem .text ,
77+ "attributes" : elem .get_attribute ("outerHTML" ),
78+ }
79+ for idx , elem in enumerate (elements )
80+ ]
81+
82+ ai_query = f"""
83+ Below is the content of the current webpage along with interactive elements.
84+ Given the intent "{ intent } ", please extract useful information and provide sufficient details
85+ about interactive elements, focusing especially on those pertinent to the provided intent.
86+
87+ If the information requested by the intent "{ intent } " is present on the page, simply return that.
88+
89+ If not, return the top 10 most relevant interactive elements in a concise, actionable format, listing them on separate lines
90+ with their ID, a description, and their possible action.
91+
92+ Do not hallucinate.
93+
94+ Page Content:
95+ { text_content }
96+
97+ Interactive Elements:
98+ { elements_info }
99+ """
100+
101+ # response = self.computer.ai.chat(ai_query)
102+
103+ # screenshot = self.driver.get_screenshot_as_base64()
104+ # old_model = self.computer.interpreter.llm.model
105+ # self.computer.interpreter.llm.model = "gpt-4o-mini"
106+ # response = self.computer.ai.chat(ai_query, base64=screenshot)
107+ # self.computer.interpreter.llm.model = old_model
108+
109+ old_model = self .computer .interpreter .llm .model
110+ self .computer .interpreter .llm .model = "gpt-4o-mini"
111+ response = self .computer .ai .chat (ai_query )
112+ self .computer .interpreter .llm .model = old_model
113+
114+ print (response )
115+ print (
116+ "Please now utilize this information or interact with the interactive elements provided to answer the user's query."
117+ )
118+
119+ def quit (self ):
120+ """Close the browser"""
121+ self .driver .quit ()
0 commit comments