|
1 | 1 | import base64 |
2 | 2 | import logging |
3 | 3 | import os |
| 4 | +import re |
4 | 5 |
|
5 | 6 | import aiohttp |
6 | 7 |
|
@@ -70,3 +71,96 @@ def validate_browser_paths(paths: list[str]) -> str: |
70 | 71 | if os.path.exists(path) and os.access(path, os.X_OK): |
71 | 72 | return path |
72 | 73 | raise InvalidBrowserPath(f'No valid browser path found in: {paths}') |
| 74 | + |
| 75 | + |
| 76 | +def clean_script_for_analysis(script: str) -> str: |
| 77 | + """ |
| 78 | + Clean JavaScript code by removing comments and string literals. |
| 79 | +
|
| 80 | + This helps avoid false positives when analyzing script structure. |
| 81 | +
|
| 82 | + Args: |
| 83 | + script: JavaScript code to clean. |
| 84 | +
|
| 85 | + Returns: |
| 86 | + str: Cleaned script with comments and strings removed. |
| 87 | + """ |
| 88 | + # Remove line comments |
| 89 | + cleaned = re.sub(r'//.*?$', '', script, flags=re.MULTILINE) |
| 90 | + # Remove block comments |
| 91 | + cleaned = re.sub(r'/\*.*?\*/', '', cleaned, flags=re.DOTALL) |
| 92 | + # Remove double quoted strings |
| 93 | + cleaned = re.sub(r'"[^"]*"', '""', cleaned) |
| 94 | + # Remove single quoted strings |
| 95 | + cleaned = re.sub(r"'[^']*'", "''", cleaned) |
| 96 | + # Remove template literals |
| 97 | + cleaned = re.sub(r'`[^`]*`', '``', cleaned) |
| 98 | + |
| 99 | + return cleaned |
| 100 | + |
| 101 | + |
| 102 | +def is_script_already_function(script: str) -> bool: |
| 103 | + """ |
| 104 | + Check if a JavaScript script is already wrapped in a function. |
| 105 | +
|
| 106 | + Args: |
| 107 | + script: JavaScript code to analyze. |
| 108 | +
|
| 109 | + Returns: |
| 110 | + bool: True if script is already a function, False otherwise. |
| 111 | + """ |
| 112 | + cleaned_script = clean_script_for_analysis(script) |
| 113 | + |
| 114 | + function_pattern = r'^\s*function\s*\([^)]*\)\s*\{' |
| 115 | + arrow_function_pattern = r'^\s*\([^)]*\)\s*=>\s*\{' |
| 116 | + |
| 117 | + return bool( |
| 118 | + re.match(function_pattern, cleaned_script.strip()) |
| 119 | + or re.match(arrow_function_pattern, cleaned_script.strip()) |
| 120 | + ) |
| 121 | + |
| 122 | + |
| 123 | +def has_return_outside_function(script: str) -> bool: |
| 124 | + """ |
| 125 | + Check if a JavaScript script has return statements outside of functions. |
| 126 | +
|
| 127 | + Args: |
| 128 | + script: JavaScript code to analyze. |
| 129 | +
|
| 130 | + Returns: |
| 131 | + bool: True if script has return outside function, False otherwise. |
| 132 | + """ |
| 133 | + cleaned_script = clean_script_for_analysis(script) |
| 134 | + |
| 135 | + # If already a function, no need to check |
| 136 | + if is_script_already_function(cleaned_script): |
| 137 | + return False |
| 138 | + |
| 139 | + # Look for 'return' statements |
| 140 | + return_pattern = r'\breturn\b' |
| 141 | + if not re.search(return_pattern, cleaned_script): |
| 142 | + return False |
| 143 | + |
| 144 | + # Check if return is inside a function by counting braces |
| 145 | + lines = cleaned_script.split('\n') |
| 146 | + brace_count = 0 |
| 147 | + in_function = False |
| 148 | + |
| 149 | + for line in lines: |
| 150 | + # Check for function declarations |
| 151 | + if re.search(r'\bfunction\b', line) or re.search(r'=>', line): |
| 152 | + in_function = True |
| 153 | + |
| 154 | + # Count braces |
| 155 | + brace_count += line.count('{') - line.count('}') |
| 156 | + |
| 157 | + # Check for return statement |
| 158 | + if re.search(return_pattern, line): |
| 159 | + if not in_function or brace_count <= 0: |
| 160 | + return True |
| 161 | + |
| 162 | + # Reset function flag if we're back to top level |
| 163 | + if brace_count <= 0: |
| 164 | + in_function = False |
| 165 | + |
| 166 | + return False |
0 commit comments