2222from scrapy import Spider , signals
2323from scrapy .core .downloader .handlers .http import HTTPDownloadHandler
2424from scrapy .crawler import Crawler
25+ from scrapy .exceptions import NotSupported
2526from scrapy .http import Request , Response
2627from scrapy .http .headers import Headers
2728from scrapy .responsetypes import responsetypes
@@ -69,6 +70,8 @@ class BrowserContextWrapper:
6970class Config :
7071 cdp_url : Optional [str ]
7172 cdp_kwargs : dict
73+ connect_url : Optional [str ]
74+ connect_kwargs : dict
7275 browser_type_name : str
7376 launch_options : dict
7477 max_pages_per_context : int
@@ -78,9 +81,15 @@ class Config:
7881
7982 @classmethod
8083 def from_settings (cls , settings : Settings ) -> "Config" :
84+ if settings .get ("PLAYWRIGHT_CDP_URL" ) and settings .get ("PLAYWRIGHT_CONNECT_URL" ):
85+ msg = "Setting both PLAYWRIGHT_CDP_URL and PLAYWRIGHT_CONNECT_URL is not supported"
86+ logger .error (msg )
87+ raise NotSupported (msg )
8188 cfg = cls (
8289 cdp_url = settings .get ("PLAYWRIGHT_CDP_URL" ),
8390 cdp_kwargs = settings .getdict ("PLAYWRIGHT_CDP_KWARGS" ) or {},
91+ connect_url = settings .get ("PLAYWRIGHT_CONNECT_URL" ),
92+ connect_kwargs = settings .getdict ("PLAYWRIGHT_CONNECT_KWARGS" ) or {},
8493 browser_type_name = settings .get ("PLAYWRIGHT_BROWSER_TYPE" ) or DEFAULT_BROWSER_TYPE ,
8594 launch_options = settings .getdict ("PLAYWRIGHT_LAUNCH_OPTIONS" ) or {},
8695 max_pages_per_context = settings .getint ("PLAYWRIGHT_MAX_PAGES_PER_CONTEXT" ),
@@ -91,10 +100,11 @@ def from_settings(cls, settings: Settings) -> "Config":
91100 ),
92101 )
93102 cfg .cdp_kwargs .pop ("endpoint_url" , None )
103+ cfg .connect_kwargs .pop ("ws_endpoint" , None )
94104 if not cfg .max_pages_per_context :
95105 cfg .max_pages_per_context = settings .getint ("CONCURRENT_REQUESTS" )
96- if cfg .cdp_url and cfg .launch_options :
97- logger .warning ("PLAYWRIGHT_CDP_URL is set , ignoring PLAYWRIGHT_LAUNCH_OPTIONS" )
106+ if ( cfg .cdp_url or cfg . connect_url ) and cfg .launch_options :
107+ logger .warning ("Connecting to remote browser , ignoring PLAYWRIGHT_LAUNCH_OPTIONS" )
98108 return cfg
99109
100110
@@ -166,7 +176,7 @@ async def _maybe_launch_browser(self) -> None:
166176 self .browser = await self .browser_type .launch (** self .config .launch_options )
167177 logger .info ("Browser %s launched" , self .browser_type .name )
168178
169- async def _maybe_connect_devtools (self ) -> None :
179+ async def _maybe_connect_remote_devtools (self ) -> None :
170180 async with self .browser_launch_lock :
171181 if not hasattr (self , "browser" ):
172182 logger .info ("Connecting using CDP: %s" , self .config .cdp_url )
@@ -175,6 +185,15 @@ async def _maybe_connect_devtools(self) -> None:
175185 )
176186 logger .info ("Connected using CDP: %s" , self .config .cdp_url )
177187
188+ async def _maybe_connect_remote (self ) -> None :
189+ async with self .browser_launch_lock :
190+ if not hasattr (self , "browser" ):
191+ logger .info ("Connecting to remote Playwright" )
192+ self .browser = await self .browser_type .connect (
193+ self .config .connect_url , ** self .config .connect_kwargs
194+ )
195+ logger .info ("Connected to remote Playwright" )
196+
178197 async def _create_browser_context (
179198 self ,
180199 name : str ,
@@ -187,20 +206,21 @@ async def _create_browser_context(
187206 if hasattr (self , "context_semaphore" ):
188207 await self .context_semaphore .acquire ()
189208 context_kwargs = context_kwargs or {}
209+ persistent = remote = False
190210 if context_kwargs .get (PERSISTENT_CONTEXT_PATH_KEY ):
191211 context = await self .browser_type .launch_persistent_context (** context_kwargs )
192212 persistent = True
193- remote = False
194213 elif self .config .cdp_url :
195- await self ._maybe_connect_devtools ()
214+ await self ._maybe_connect_remote_devtools ()
215+ context = await self .browser .new_context (** context_kwargs )
216+ remote = True
217+ elif self .config .connect_url :
218+ await self ._maybe_connect_remote ()
196219 context = await self .browser .new_context (** context_kwargs )
197- persistent = False
198220 remote = True
199221 else :
200222 await self ._maybe_launch_browser ()
201223 context = await self .browser .new_context (** context_kwargs )
202- persistent = False
203- remote = False
204224
205225 context .on (
206226 "close" , self ._make_close_browser_context_callback (name , persistent , remote , spider )
0 commit comments