@@ -64,13 +64,22 @@ def __init__(self, crawler: Crawler) -> None:
6464 crawler .signals .connect (self ._engine_started , signals .engine_started )
6565 self .stats = crawler .stats
6666
67- self .browser_launch_lock = asyncio .Lock ()
68- self .context_launch_lock = asyncio .Lock ()
67+ # browser
6968 self .browser_type_name = settings .get ("PLAYWRIGHT_BROWSER_TYPE" ) or DEFAULT_BROWSER_TYPE
69+ self .browser_launch_lock = asyncio .Lock ()
70+ self .launch_options : dict = settings .getdict ("PLAYWRIGHT_LAUNCH_OPTIONS" ) or {}
71+
72+ # contexts
7073 self .max_pages_per_context : int = settings .getint (
7174 "PLAYWRIGHT_MAX_PAGES_PER_CONTEXT"
7275 ) or settings .getint ("CONCURRENT_REQUESTS" )
73- self .launch_options : dict = settings .getdict ("PLAYWRIGHT_LAUNCH_OPTIONS" ) or {}
76+ self .context_launch_lock = asyncio .Lock ()
77+ self .contexts : Dict [str , BrowserContextWrapper ] = {}
78+ self .context_kwargs : dict = settings .getdict ("PLAYWRIGHT_CONTEXTS" )
79+ if settings .getint ("PLAYWRIGHT_MAX_CONTEXTS" ):
80+ self .context_semaphore = asyncio .Semaphore (
81+ value = settings .getint ("PLAYWRIGHT_MAX_CONTEXTS" )
82+ )
7483
7584 self .default_navigation_timeout : Optional [float ] = None
7685 if "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT" in settings :
@@ -79,7 +88,7 @@ def __init__(self, crawler: Crawler) -> None:
7988 settings .get ("PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT" )
8089 )
8190
82- # header-related settings
91+ # headers
8392 if "PLAYWRIGHT_PROCESS_REQUEST_HEADERS" in settings :
8493 if settings ["PLAYWRIGHT_PROCESS_REQUEST_HEADERS" ] is None :
8594 self .process_request_headers = None
@@ -99,10 +108,6 @@ def __init__(self, crawler: Crawler) -> None:
99108 else :
100109 self .process_request_headers = use_scrapy_headers
101110
102- # context-related settings
103- self .contexts : Dict [str , BrowserContextWrapper ] = {}
104- self .context_kwargs : dict = settings .getdict ("PLAYWRIGHT_CONTEXTS" )
105-
106111 self .abort_request : Optional [Callable [[PlaywrightRequest ], Union [Awaitable , bool ]]] = None
107112 if settings .get ("PLAYWRIGHT_ABORT_REQUEST" ):
108113 self .abort_request = load_object (settings ["PLAYWRIGHT_ABORT_REQUEST" ])
@@ -123,13 +128,13 @@ async def _launch(self) -> None:
123128 self .browser_type : BrowserType = getattr (self .playwright , self .browser_type_name )
124129 if self .context_kwargs :
125130 logger .info (f"Launching { len (self .context_kwargs )} startup context(s)" )
126- contexts = await asyncio .gather (
131+ await asyncio .gather (
127132 * [
128133 self ._create_browser_context (name = name , context_kwargs = kwargs )
129134 for name , kwargs in self .context_kwargs .items ()
130135 ]
131136 )
132- self .contexts = dict ( zip ( self . context_kwargs . keys (), contexts ) )
137+ self ._set_max_concurrent_context_count ( )
133138 logger .info ("Startup context(s) launched" )
134139 self .stats .set_value ("playwright/page_count" , self ._get_total_page_count ())
135140
@@ -144,6 +149,8 @@ async def _create_browser_context(
144149 self , name : str , context_kwargs : Optional [dict ]
145150 ) -> BrowserContextWrapper :
146151 """Create a new context, also launching a browser if necessary."""
152+ if hasattr (self , "context_semaphore" ):
153+ await self .context_semaphore .acquire ()
147154 context_kwargs = context_kwargs or {}
148155 if context_kwargs .get (PERSISTENT_CONTEXT_PATH_KEY ):
149156 context = await self .browser_type .launch_persistent_context (** context_kwargs )
@@ -159,11 +166,13 @@ async def _create_browser_context(
159166 self .stats .inc_value ("playwright/context_count" )
160167 if self .default_navigation_timeout is not None :
161168 context .set_default_navigation_timeout (self .default_navigation_timeout )
162- return BrowserContextWrapper (
169+ self . contexts [ name ] = BrowserContextWrapper (
163170 context = context ,
164171 semaphore = asyncio .Semaphore (value = self .max_pages_per_context ),
165172 persistent = persistent ,
166173 )
174+ self ._set_max_concurrent_context_count ()
175+ return self .contexts [name ]
167176
168177 async def _create_page (self , request : Request ) -> Page :
169178 """Create a new page in a context, also creating a new context if necessary."""
@@ -173,7 +182,7 @@ async def _create_page(self, request: Request) -> Page:
173182 async with self .context_launch_lock :
174183 context = self .contexts .get (context_name )
175184 if context is None :
176- context = self . contexts [ context_name ] = await self ._create_browser_context (
185+ context = await self ._create_browser_context (
177186 name = context_name , context_kwargs = request .meta .get ("playwright_context_kwargs" )
178187 )
179188
@@ -208,6 +217,11 @@ def _set_max_concurrent_page_count(self):
208217 if current_max_count is None or count > current_max_count :
209218 self .stats .set_value ("playwright/page_count/max_concurrent" , count )
210219
220+ def _set_max_concurrent_context_count (self ):
221+ current_max_count = self .stats .get_value ("playwright/context_count/max_concurrent" )
222+ if current_max_count is None or len (self .contexts ) > current_max_count :
223+ self .stats .set_value ("playwright/context_count/max_concurrent" , len (self .contexts ))
224+
211225 @inlineCallbacks
212226 def close (self ) -> Deferred :
213227 logger .info ("Closing download handler" )
@@ -355,9 +369,10 @@ def close_page_callback() -> None:
355369
356370 def _make_close_browser_context_callback (self , name : str , persistent : bool ) -> Callable :
357371 def close_browser_context_callback () -> None :
372+ self .contexts .pop (name , None )
373+ if hasattr (self , "context_semaphore" ):
374+ self .context_semaphore .release ()
358375 logger .debug (f"Browser context closed: '{ name } ' (persistent={ persistent } )" )
359- if name in self .contexts :
360- self .contexts .pop (name )
361376
362377 return close_browser_context_callback
363378
0 commit comments