Merge branch 'main' into andrew/update_last_known_good_chrome

ayjayt · web-flow · commit 75784e019773 · 2025-11-19T14:19:13.000-05:00
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -4,6 +4,8 @@
 - Alter `get_chrome` verbose to print whole JSON
 - Change chrome download path to use XDG cache dir
 - Don't download chrome if we already have that version: add force argument
+- Remove unused system inspection code
+- Add a set of helper functions to await for tab loading and send javascript
 v1.2.1
 - Use custom threadpool for functions that could be running during shutdown:
   Python's stdlib threadpool isn't available during interpreter shutdown, nor
diff --git a/pyproject.toml b/pyproject.toml
@@ -110,8 +110,9 @@ asyncio_default_fixture_loop_scope = "function"
 log_cli = false
 addopts = "--import-mode=append"
 
+# tell poe to use the env we give it, otherwise it detects uv and overrides flags
 [tool.poe]
-executor.type = "virtualenv"
+executor.type = "simple"
 
 [tool.poe.tasks]
 test_proc = "pytest --log-level=1 -W error -n auto -v -rfE --capture=fd tests/test_process.py"
diff --git a/src/choreographer/browser_async.py b/src/choreographer/browser_async.py
@@ -14,7 +14,7 @@
 from choreographer import protocol
 
 from ._brokers import Broker
-from .browsers import BrowserClosedError, BrowserDepsError, BrowserFailedError, Chromium
+from .browsers import BrowserClosedError, BrowserFailedError, Chromium
 from .channels import ChannelClosedError, Pipe
 from .protocol.devtools_async import Session, Target
 from .utils import TmpDirWarning, _manual_thread_pool
@@ -175,11 +175,6 @@ def run() -> subprocess.Popen[bytes] | subprocess.Popen[str]:  # depends on args
                 if counter == MAX_POPULATE_LOOPS:
                     break
         except (BrowserClosedError, BrowserFailedError, asyncio.CancelledError) as e:
-            if (
-                hasattr(self._browser_impl, "missing_libs")
-                and self._browser_impl.missing_libs  # type: ignore[reportAttributeAccessIssue]
-            ):
-                raise BrowserDepsError from e
             raise BrowserFailedError(
                 "The browser seemed to close immediately after starting.",
                 "You can set the `logging.Logger` level lower to see more output.",
diff --git a/src/choreographer/browsers/_errors.py b/src/choreographer/browsers/_errors.py
@@ -6,6 +6,7 @@ class BrowserFailedError(RuntimeError):
     """An error for when the browser fails to launch."""
 
 
+# not currently used but keeping copy + not breaking API
 class BrowserDepsError(BrowserFailedError):
     """An error for when the browser is closed because of missing libs."""
 
diff --git a/src/choreographer/browsers/chromium.py b/src/choreographer/browsers/chromium.py
@@ -115,46 +115,6 @@ def logger_parser(
 
         return True
 
-    def _libs_ok(self) -> bool:
-        """Return true if libs ok."""
-        if self.skip_local:
-            _logger.debug(
-                "If we HAVE to skip local.",
-            )
-            return True
-        _logger.debug("Checking for libs needed.")
-        if platform.system() != "Linux":
-            _logger.debug("We're not in linux, so no need for check.")
-            return True
-        p = None
-        try:
-            _logger.debug(f"Trying ldd {self.path}")
-            p = subprocess.run(  # noqa: S603, validating run with variables
-                [  # noqa: S607 path is all we have
-                    "ldd",
-                    str(self.path),
-                ],
-                capture_output=True,
-                timeout=5,
-                check=True,
-            )
-        except Exception as e:  # noqa: BLE001
-            msg = "ldd failed."
-            stderr = p.stderr.decode() if p and p.stderr else None
-            # Log failure as INFO rather than WARNING so that it's hidden by default,
-            # since browser may succeed even if ldd fails
-            _logger.info(
-                msg  # noqa: G003 + in log
-                + f" e: {e}, stderr: {stderr}",
-            )
-            return False
-        if b"not found" in p.stdout:
-            msg = "Found deps missing in chrome"
-            _logger.debug2(msg + f" {p.stdout.decode()}")
-            return False
-        _logger.debug("No problems found with dependencies")
-        return True
-
     def __init__(
         self,
         channel: ChannelInterface,
@@ -220,7 +180,6 @@ def pre_open(self) -> None:
             path=self._tmp_dir_path,
             sneak=self._is_isolated,
         )
-        self.missing_libs = not self._libs_ok()
         _logger.info(f"Temporary directory at: {self.tmp_dir.path}")
 
     def is_isolated(self) -> bool:
diff --git a/src/choreographer/protocol/devtools_async_helpers.py b/src/choreographer/protocol/devtools_async_helpers.py
@@ -0,0 +1,135 @@
+"""Async helper functions for common Chrome DevTools Protocol patterns."""
+
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from choreographer import Browser, Tab
+
+    from . import BrowserResponse
+
+
+async def create_and_wait(
+    browser: Browser,
+    url: str = "",
+    *,
+    timeout: float = 30.0,
+) -> Tab:
+    """
+    Create a new tab and wait for it to load.
+
+    Args:
+        browser: Browser instance
+        url: URL to navigate to (default: blank page)
+        timeout: Seconds to wait for page load (default: 30.0)
+
+    Returns:
+        The created Tab
+
+    """
+    tab = await browser.create_tab(url)
+    temp_session = await tab.create_session()
+
+    try:
+        load_future = temp_session.subscribe_once("Page.loadEventFired")
+        await temp_session.send_command("Page.enable")
+        await temp_session.send_command("Runtime.enable")
+
+        if url:
+            try:
+                await asyncio.wait_for(load_future, timeout=timeout)
+            except (asyncio.TimeoutError, asyncio.CancelledError, TimeoutError):
+                # Stop the page load when timeout occurs
+                await temp_session.send_command("Page.stopLoading")
+                raise
+    finally:
+        await tab.close_session(temp_session.session_id)
+
+    return tab
+
+
+async def navigate_and_wait(
+    tab: Tab,
+    url: str,
+    *,
+    timeout: float = 30.0,
+) -> Tab:
+    """
+    Navigate an existing tab to a URL and wait for it to load.
+
+    Args:
+        tab: Tab to navigate
+        url: URL to navigate to
+        timeout: Seconds to wait for page load (default: 30.0)
+
+    Returns:
+        The Tab after navigation completes
+
+    """
+    temp_session = await tab.create_session()
+
+    try:
+        await temp_session.send_command("Page.enable")
+        await temp_session.send_command("Runtime.enable")
+        load_future = temp_session.subscribe_once("Page.loadEventFired")
+        try:
+
+            async def _freezers():
+                # If no resolve, will freeze
+                await temp_session.send_command("Page.navigate", params={"url": url})
+                # Can freeze if resolve bad
+                await load_future
+
+            await asyncio.wait_for(_freezers(), timeout=timeout)
+        except (asyncio.TimeoutError, asyncio.CancelledError, TimeoutError):
+            # Stop the navigation when timeout occurs
+            await temp_session.send_command("Page.stopLoading")
+            raise
+    finally:
+        await tab.close_session(temp_session.session_id)
+
+    return tab
+
+
+async def execute_js_and_wait(
+    tab: Tab,
+    expression: str,
+    *,
+    timeout: float = 30.0,
+) -> BrowserResponse:
+    """
+    Execute JavaScript in a tab and return the result.
+
+    Args:
+        tab: Tab to execute JavaScript in
+        expression: JavaScript expression to evaluate
+        timeout: Seconds to wait for execution (default: 30.0)
+
+    Returns:
+        Response dict from Runtime.evaluate with 'result' and optional
+        'exceptionDetails'
+
+    """
+    temp_session = await tab.create_session()
+
+    try:
+        await temp_session.send_command("Page.enable")
+        await temp_session.send_command("Runtime.enable")
+
+        response = await asyncio.wait_for(
+            temp_session.send_command(
+                "Runtime.evaluate",
+                params={
+                    "expression": expression,
+                    "awaitPromise": True,
+                    "returnByValue": True,
+                },
+            ),
+            timeout=timeout,
+        )
+
+        return response
+    finally:
+        await tab.close_session(temp_session.session_id)
diff --git a/tests/test_devtools_async_helpers.py b/tests/test_devtools_async_helpers.py
@@ -0,0 +1,92 @@
+import asyncio
+
+import logistro
+import pytest
+
+from choreographer.protocol.devtools_async_helpers import (
+    create_and_wait,
+    execute_js_and_wait,
+    navigate_and_wait,
+)
+
+pytestmark = pytest.mark.asyncio(loop_scope="function")
+
+_logger = logistro.getLogger(__name__)
+
+
+# Errata: don't use data urls, whether or not they load is variable
+# depends on how long chrome has been open for, how they were entered,
+# etc
+
+
+@pytest.mark.asyncio
+async def test_create_and_wait(browser):
+    """Test create_and_wait with both valid data URL and blank URL."""
+    _logger.info("testing create_and_wait...")
+
+    # Count tabs before
+    initial_tab_count = len(browser.tabs)
+
+    # Create a simple HTML page as a data URL
+    data_url = "chrome://version"
+
+    # Test 1: Create tab with data URL - should succeed
+    tab1 = await create_and_wait(browser, url=data_url, timeout=5.0)
+    assert tab1 is not None
+
+    # Verify the page loaded correctly using execute_js_and_wait
+    result = await execute_js_and_wait(tab1, "window.location.href", timeout=5.0)
+    location = result["result"]["result"]["value"]
+    assert location.startswith(data_url)
+
+    # Test 2: Create tab without URL - should succeed (blank page)
+    tab2 = await create_and_wait(browser, url="", timeout=5.0)
+    assert tab2 is not None
+
+    # Verify we have 2 more tabs
+    final_tab_count = len(browser.tabs)
+    assert final_tab_count == initial_tab_count + 2
+
+    # Test 3: Create tab with bad URL that won't load - should timeout
+    with pytest.raises(asyncio.TimeoutError):
+        await create_and_wait(browser, url="http://192.0.2.1:9999", timeout=0.5)
+
+
+@pytest.mark.asyncio
+async def test_navigate_and_wait(browser):
+    """Test navigate_and_wait with both valid data URL and bad URL."""
+    _logger.info("testing navigate_and_wait...")
+    # Create two blank tabs first
+    tab = await browser.create_tab("")
+
+    # navigating to dataurls seems to be fine right now,
+    # but if one day you have an error here,
+    # change to the strategy above
+
+    # Create a data URL with identifiable content
+    html_content1 = "<html><body><h1>Navigation Test 1</h1></body></html>"
+    data_url1 = f"data:text/html,{html_content1}"
+
+    html_content2 = "<html><body><h1>Navigation Test 2</h1></body></html>"
+    data_url2 = f"data:text/html,{html_content2}"
+
+    # Test 1: Navigate first tab to valid data URL - should succeed
+    result_tab1 = await navigate_and_wait(tab, url=data_url1, timeout=5.0)
+    assert result_tab1 is tab
+
+    # Verify the navigation succeeded using execute_js_and_wait
+    result = await execute_js_and_wait(tab, "window.location.href", timeout=5.0)
+    location = result["result"]["result"]["value"]
+    assert location.startswith("data:text/html")
+
+    # Test 2: Navigate second tab to another valid data URL - should succeed
+    result_tab2 = await navigate_and_wait(tab, url=data_url2, timeout=5.0)
+    assert result_tab2 is tab
+
+    # Verify the navigation succeeded
+    result = await execute_js_and_wait(tab, "window.location.href", timeout=5.0)
+    location = result["result"]["result"]["value"]
+    assert location.startswith("data:text/html")
+    # Test 3: Navigate to bad URL that won't load - should timeout
+    with pytest.raises(asyncio.TimeoutError):
+        await navigate_and_wait(tab, url="http://192.0.2.1:9999", timeout=0.5)