Skip to content

Commit 939a473

Browse files
authored
Merge pull request #147 from autoscrape-labs/fix/script-execution
Improve script execution logic
2 parents 9025ed5 + cf67527 commit 939a473

File tree

5 files changed

+480
-21
lines changed

5 files changed

+480
-21
lines changed

pydoll/browser/tab.py

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
TypeAlias,
1313
Union,
1414
cast,
15+
overload,
1516
)
1617

1718
import aiofiles
@@ -32,6 +33,7 @@
3233
IFrameNotFound,
3334
InvalidFileExtension,
3435
InvalidIFrame,
36+
InvalidScriptWithElement,
3537
NetworkEventsNotEnabled,
3638
NoDialogPresent,
3739
NotAnIFrame,
@@ -43,9 +45,13 @@
4345
from pydoll.protocol.network.types import Cookie, CookieParam, NetworkLog
4446
from pydoll.protocol.page.events import PageEvent
4547
from pydoll.protocol.page.responses import CaptureScreenshotResponse, PrintToPDFResponse
46-
from pydoll.protocol.runtime.responses import EvaluateResponse
48+
from pydoll.protocol.runtime.responses import CallFunctionOnResponse, EvaluateResponse
4749
from pydoll.protocol.storage.responses import GetCookiesResponse
48-
from pydoll.utils import decode_base64_to_bytes
50+
from pydoll.utils import (
51+
decode_base64_to_bytes,
52+
has_return_outside_function,
53+
is_script_already_function,
54+
)
4955

5056
if TYPE_CHECKING:
5157
from pydoll.browser.chromium.base import Browser
@@ -554,7 +560,15 @@ async def handle_dialog(self, accept: bool, prompt_text: Optional[str] = None):
554560
PageCommands.handle_javascript_dialog(accept=accept, prompt_text=prompt_text)
555561
)
556562

557-
async def execute_script(self, script: str, element: Optional[WebElement] = None):
563+
@overload
564+
async def execute_script(self, script: str) -> EvaluateResponse: ...
565+
566+
@overload
567+
async def execute_script(self, script: str, element: WebElement) -> CallFunctionOnResponse: ...
568+
569+
async def execute_script(
570+
self, script: str, element: Optional[WebElement] = None
571+
) -> Union[EvaluateResponse, CallFunctionOnResponse]:
558572
"""
559573
Execute JavaScript in page context.
560574
@@ -565,17 +579,17 @@ async def execute_script(self, script: str, element: Optional[WebElement] = None
565579
Examples:
566580
await page.execute_script('argument.click()', element)
567581
await page.execute_script('argument.value = "Hello"', element)
582+
583+
Raises:
584+
InvalidScriptWithElement: If script contains 'argument' but no element is provided.
568585
"""
586+
if 'argument' in script and element is None:
587+
raise InvalidScriptWithElement('Script contains "argument" but no element was provided')
588+
569589
if element:
570-
script = script.replace('argument', 'this')
571-
script = f'function(){{ {script} }}'
572-
object_id = element._object_id
573-
command = RuntimeCommands.call_function_on(
574-
object_id=object_id, function_declaration=script, return_by_value=True
575-
)
576-
else:
577-
command = RuntimeCommands.evaluate(expression=script)
578-
return await self._execute_command(command)
590+
return await self._execute_script_with_element(script, element)
591+
592+
return await self._execute_script_without_element(script)
579593

580594
@asynccontextmanager
581595
async def expect_file_chooser(
@@ -693,6 +707,46 @@ async def callback_wrapper(event):
693707
event_name, function_to_register, temporary
694708
)
695709

710+
async def _execute_script_with_element(self, script: str, element: WebElement):
711+
"""
712+
Execute script with element context.
713+
714+
Args:
715+
script: JavaScript code to execute.
716+
element: Element context (use 'argument' in script to reference).
717+
718+
Returns:
719+
The result of the script execution.
720+
"""
721+
if 'argument' not in script:
722+
raise InvalidScriptWithElement('Script does not contain "argument"')
723+
724+
script = script.replace('argument', 'this')
725+
726+
if not is_script_already_function(script):
727+
script = f'function(){{ {script} }}'
728+
729+
command = RuntimeCommands.call_function_on(
730+
object_id=element._object_id, function_declaration=script, return_by_value=True
731+
)
732+
return await self._execute_command(command)
733+
734+
async def _execute_script_without_element(self, script: str):
735+
"""
736+
Execute script without element context.
737+
738+
Args:
739+
script: JavaScript code to execute.
740+
741+
Returns:
742+
The result of the script execution.
743+
"""
744+
if has_return_outside_function(script):
745+
script = f'(function(){{ {script} }})()'
746+
747+
command = RuntimeCommands.evaluate(expression=script)
748+
return await self._execute_command(command)
749+
696750
async def _refresh_if_url_not_changed(self, url: str) -> bool:
697751
"""Refresh page if URL hasn't changed."""
698752
current_url = await self.current_url

pydoll/exceptions.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,3 +239,15 @@ class NetworkEventsNotEnabled(PydollException):
239239
"""Raised when network events are not enabled."""
240240

241241
message = 'Network events not enabled'
242+
243+
244+
class ScriptException(PydollException):
245+
"""Base class for exceptions related to JavaScript execution."""
246+
247+
message = 'A script execution error occurred'
248+
249+
250+
class InvalidScriptWithElement(ScriptException):
251+
"""Raised when a script contains 'argument' but no element is provided."""
252+
253+
message = 'Script contains "argument" but no element was provided'

pydoll/utils.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import base64
22
import logging
33
import os
4+
import re
45

56
import aiohttp
67

@@ -70,3 +71,96 @@ def validate_browser_paths(paths: list[str]) -> str:
7071
if os.path.exists(path) and os.access(path, os.X_OK):
7172
return path
7273
raise InvalidBrowserPath(f'No valid browser path found in: {paths}')
74+
75+
76+
def clean_script_for_analysis(script: str) -> str:
77+
"""
78+
Clean JavaScript code by removing comments and string literals.
79+
80+
This helps avoid false positives when analyzing script structure.
81+
82+
Args:
83+
script: JavaScript code to clean.
84+
85+
Returns:
86+
str: Cleaned script with comments and strings removed.
87+
"""
88+
# Remove line comments
89+
cleaned = re.sub(r'//.*?$', '', script, flags=re.MULTILINE)
90+
# Remove block comments
91+
cleaned = re.sub(r'/\*.*?\*/', '', cleaned, flags=re.DOTALL)
92+
# Remove double quoted strings
93+
cleaned = re.sub(r'"[^"]*"', '""', cleaned)
94+
# Remove single quoted strings
95+
cleaned = re.sub(r"'[^']*'", "''", cleaned)
96+
# Remove template literals
97+
cleaned = re.sub(r'`[^`]*`', '``', cleaned)
98+
99+
return cleaned
100+
101+
102+
def is_script_already_function(script: str) -> bool:
103+
"""
104+
Check if a JavaScript script is already wrapped in a function.
105+
106+
Args:
107+
script: JavaScript code to analyze.
108+
109+
Returns:
110+
bool: True if script is already a function, False otherwise.
111+
"""
112+
cleaned_script = clean_script_for_analysis(script)
113+
114+
function_pattern = r'^\s*function\s*\([^)]*\)\s*\{'
115+
arrow_function_pattern = r'^\s*\([^)]*\)\s*=>\s*\{'
116+
117+
return bool(
118+
re.match(function_pattern, cleaned_script.strip())
119+
or re.match(arrow_function_pattern, cleaned_script.strip())
120+
)
121+
122+
123+
def has_return_outside_function(script: str) -> bool:
124+
"""
125+
Check if a JavaScript script has return statements outside of functions.
126+
127+
Args:
128+
script: JavaScript code to analyze.
129+
130+
Returns:
131+
bool: True if script has return outside function, False otherwise.
132+
"""
133+
cleaned_script = clean_script_for_analysis(script)
134+
135+
# If already a function, no need to check
136+
if is_script_already_function(cleaned_script):
137+
return False
138+
139+
# Look for 'return' statements
140+
return_pattern = r'\breturn\b'
141+
if not re.search(return_pattern, cleaned_script):
142+
return False
143+
144+
# Check if return is inside a function by counting braces
145+
lines = cleaned_script.split('\n')
146+
brace_count = 0
147+
in_function = False
148+
149+
for line in lines:
150+
# Check for function declarations
151+
if re.search(r'\bfunction\b', line) or re.search(r'=>', line):
152+
in_function = True
153+
154+
# Count braces
155+
brace_count += line.count('{') - line.count('}')
156+
157+
# Check for return statement
158+
if re.search(return_pattern, line):
159+
if not in_function or brace_count <= 0:
160+
return True
161+
162+
# Reset function flag if we're back to top level
163+
if brace_count <= 0:
164+
in_function = False
165+
166+
return False

0 commit comments

Comments
 (0)