@@ -402,8 +402,6 @@ async def send_cdp(self, method: str, params: Optional[dict] = None) -> dict:
402402 self ._stagehand .logger .debug (
403403 f"CDP command '{ method } ' failed: { e } . Attempting to reconnect..."
404404 )
405- # Try to reconnect
406- await self ._ensure_cdp_session ()
407405 # Handle specific errors if needed (e.g., session closed)
408406 if "Target closed" in str (e ) or "Session closed" in str (e ):
409407 # Attempt to reset the client if the session closed unexpectedly
@@ -446,70 +444,212 @@ async def _wait_for_settled_dom(self, timeout_ms: int = None):
446444 """
447445 Wait for the DOM to settle (stop changing) before proceeding.
448446
447+ **Definition of "settled"**
448+ • No in-flight network requests (except WebSocket / Server-Sent-Events).
449+ • That idle state lasts for at least **500 ms** (the "quiet-window").
450+
451+ **How it works**
452+ 1. Subscribes to CDP Network and Page events for the main target and all
453+ out-of-process iframes (via `Target.setAutoAttach { flatten:true }`).
454+ 2. Every time `Network.requestWillBeSent` fires, the request ID is added
455+ to an **`inflight`** set.
456+ 3. When the request finishes—`loadingFinished`, `loadingFailed`,
457+ `requestServedFromCache`, or a *data:* response—the request ID is
458+ removed.
459+ 4. *Document* requests are also mapped **frameId → requestId**; when
460+ `Page.frameStoppedLoading` fires the corresponding Document request is
461+ removed immediately (covers iframes whose network events never close).
462+ 5. A **stalled-request sweep timer** runs every 500 ms. If a *Document*
463+ request has been open for ≥ 2 s it is forcibly removed; this prevents
464+ ad/analytics iframes from blocking the wait forever.
465+ 6. When `inflight` becomes empty the helper starts a 500 ms timer.
466+ If no new request appears before the timer fires, the promise
467+ resolves → **DOM is considered settled**.
468+ 7. A global guard (`timeoutMs` or `stagehand.domSettleTimeoutMs`,
469+ default ≈ 30 s) ensures we always resolve; if it fires we log how many
470+ requests were still outstanding.
471+
449472 Args:
450473 timeout_ms (int, optional): Maximum time to wait in milliseconds.
451474 If None, uses the stagehand client's dom_settle_timeout_ms.
452475 """
453- try :
454- timeout = timeout_ms or getattr (
455- self ._stagehand , "dom_settle_timeout_ms" , 30000
456- )
457- import asyncio
458-
459- # Wait for domcontentloaded first
460- await self ._page .wait_for_load_state ("domcontentloaded" )
461-
462- # Create a timeout promise that resolves after the specified time
463- timeout_task = asyncio .create_task (asyncio .sleep (timeout / 1000 ))
476+ import asyncio
477+ import time
464478
465- # Try to check if the DOM has settled
466- try :
467- # Create a task for evaluating the DOM settling
468- eval_task = asyncio .create_task (
469- self ._page .evaluate (
470- """
471- () => {
472- return new Promise((resolve) => {
473- if (typeof window.waitForDomSettle === 'function') {
474- window.waitForDomSettle().then(resolve);
475- } else {
476- console.warn('waitForDomSettle is not defined, considering DOM as settled');
477- resolve();
478- }
479- });
480- }
481- """
482- )
483- )
484-
485- # Create tasks for other ways to determine page readiness
486- dom_task = asyncio .create_task (
487- self ._page .wait_for_load_state ("domcontentloaded" )
488- )
489- body_task = asyncio .create_task (self ._page .wait_for_selector ("body" ))
479+ timeout = timeout_ms or getattr (self ._stagehand , "dom_settle_timeout_ms" , 30000 )
480+ client = await self .get_cdp_client ()
490481
491- # Wait for the first task to complete
492- done , pending = await asyncio . wait (
493- [ eval_task , dom_task , body_task , timeout_task ],
494- return_when = asyncio . FIRST_COMPLETED ,
495- )
482+ # Check if document exists
483+ try :
484+ await self . _page . title ()
485+ except Exception :
486+ await self . _page . wait_for_load_state ( "domcontentloaded" )
496487
497- # Cancel any pending tasks
498- for task in pending :
499- task .cancel ()
488+ # Enable CDP domains
489+ await client .send ("Network.enable" )
490+ await client .send ("Page.enable" )
491+ await client .send (
492+ "Target.setAutoAttach" ,
493+ {
494+ "autoAttach" : True ,
495+ "waitForDebuggerOnStart" : False ,
496+ "flatten" : True ,
497+ "filter" : [
498+ {"type" : "worker" , "exclude" : True },
499+ {"type" : "shared_worker" , "exclude" : True },
500+ ],
501+ },
502+ )
500503
501- # If the timeout was hit, log a warning
502- if timeout_task in done :
504+ # Set up tracking structures
505+ inflight = set () # Set of request IDs
506+ meta = {} # Dict of request ID -> {"url": str, "start": float}
507+ doc_by_frame = {} # Dict of frame ID -> request ID
508+
509+ # Event tracking
510+ quiet_timer = None
511+ stalled_request_sweep_task = None
512+ loop = asyncio .get_event_loop ()
513+ done_event = asyncio .Event ()
514+
515+ def clear_quiet ():
516+ nonlocal quiet_timer
517+ if quiet_timer :
518+ quiet_timer .cancel ()
519+ quiet_timer = None
520+
521+ def resolve_done ():
522+ """Cleanup and mark as done"""
523+ clear_quiet ()
524+ if stalled_request_sweep_task and not stalled_request_sweep_task .done ():
525+ stalled_request_sweep_task .cancel ()
526+ done_event .set ()
527+
528+ def maybe_quiet ():
529+ """Start quiet timer if no requests are in flight"""
530+ nonlocal quiet_timer
531+ if len (inflight ) == 0 and not quiet_timer :
532+ quiet_timer = loop .call_later (0.5 , resolve_done )
533+
534+ def finish_req (request_id : str ):
535+ """Mark a request as finished"""
536+ if request_id not in inflight :
537+ return
538+ inflight .remove (request_id )
539+ meta .pop (request_id , None )
540+ # Remove from frame mapping
541+ for fid , rid in list (doc_by_frame .items ()):
542+ if rid == request_id :
543+ doc_by_frame .pop (fid )
544+ clear_quiet ()
545+ maybe_quiet ()
546+
547+ # Event handlers
548+ def on_request (params ):
549+ """Handle Network.requestWillBeSent"""
550+ if params .get ("type" ) in ["WebSocket" , "EventSource" ]:
551+ return
552+
553+ request_id = params ["requestId" ]
554+ inflight .add (request_id )
555+ meta [request_id ] = {"url" : params ["request" ]["url" ], "start" : time .time ()}
556+
557+ if params .get ("type" ) == "Document" and params .get ("frameId" ):
558+ doc_by_frame [params ["frameId" ]] = request_id
559+
560+ clear_quiet ()
561+
562+ def on_finish (params ):
563+ """Handle Network.loadingFinished"""
564+ finish_req (params ["requestId" ])
565+
566+ def on_failed (params ):
567+ """Handle Network.loadingFailed"""
568+ finish_req (params ["requestId" ])
569+
570+ def on_cached (params ):
571+ """Handle Network.requestServedFromCache"""
572+ finish_req (params ["requestId" ])
573+
574+ def on_data_url (params ):
575+ """Handle Network.responseReceived for data: URLs"""
576+ if params .get ("response" , {}).get ("url" , "" ).startswith ("data:" ):
577+ finish_req (params ["requestId" ])
578+
579+ def on_frame_stop (params ):
580+ """Handle Page.frameStoppedLoading"""
581+ frame_id = params ["frameId" ]
582+ if frame_id in doc_by_frame :
583+ finish_req (doc_by_frame [frame_id ])
584+
585+ # Register event handlers
586+ client .on ("Network.requestWillBeSent" , on_request )
587+ client .on ("Network.loadingFinished" , on_finish )
588+ client .on ("Network.loadingFailed" , on_failed )
589+ client .on ("Network.requestServedFromCache" , on_cached )
590+ client .on ("Network.responseReceived" , on_data_url )
591+ client .on ("Page.frameStoppedLoading" , on_frame_stop )
592+
593+ async def sweep_stalled_requests ():
594+ """Remove stalled document requests after 2 seconds"""
595+ while not done_event .is_set ():
596+ await asyncio .sleep (0.5 )
597+ now = time .time ()
598+ for request_id , request_meta in list (meta .items ()):
599+ if now - request_meta ["start" ] > 2.0 :
600+ inflight .discard (request_id )
601+ meta .pop (request_id , None )
602+ self ._stagehand .logger .debug (
603+ "⏳ forcing completion of stalled iframe document" ,
604+ extra = {"url" : request_meta ["url" ][:120 ]},
605+ )
606+ maybe_quiet ()
607+
608+ # Start stalled request sweeper
609+ stalled_request_sweep_task = asyncio .create_task (sweep_stalled_requests ())
610+
611+ # Set up timeout guard
612+ async def timeout_guard ():
613+ await asyncio .sleep (timeout / 1000 )
614+ if not done_event .is_set ():
615+ if len (inflight ) > 0 :
503616 self ._stagehand .logger .debug (
504- "DOM settle timeout exceeded, continuing anyway " ,
505- extra = {"timeout_ms " : timeout },
617+ "⚠️ DOM- settle timeout reached – network requests still pending " ,
618+ extra = {"count " : len ( inflight ) },
506619 )
620+ resolve_done ()
507621
508- except Exception as e :
509- self ._stagehand .logger .debug (f"Error waiting for DOM to settle: { e } " )
622+ timeout_task = asyncio .create_task (timeout_guard ())
510623
511- except Exception as e :
512- self ._stagehand .logger .error (f"Error in _wait_for_settled_dom: { e } " )
624+ # Initial check
625+ maybe_quiet ()
626+
627+ try :
628+ # Wait for completion
629+ await done_event .wait ()
630+ finally :
631+ # Cleanup
632+ client .remove_listener ("Network.requestWillBeSent" , on_request )
633+ client .remove_listener ("Network.loadingFinished" , on_finish )
634+ client .remove_listener ("Network.loadingFailed" , on_failed )
635+ client .remove_listener ("Network.requestServedFromCache" , on_cached )
636+ client .remove_listener ("Network.responseReceived" , on_data_url )
637+ client .remove_listener ("Page.frameStoppedLoading" , on_frame_stop )
638+
639+ if quiet_timer :
640+ quiet_timer .cancel ()
641+ if stalled_request_sweep_task and not stalled_request_sweep_task .done ():
642+ stalled_request_sweep_task .cancel ()
643+ try :
644+ await stalled_request_sweep_task
645+ except asyncio .CancelledError :
646+ pass
647+ if timeout_task and not timeout_task .done ():
648+ timeout_task .cancel ()
649+ try :
650+ await timeout_task
651+ except asyncio .CancelledError :
652+ pass
513653
514654 # Forward other Page methods to underlying Playwright page
515655 def __getattr__ (self , name ):
0 commit comments