4141logger = logging .getLogger ("scrapy-playwright" )
4242
4343
44- def _make_request_logger (context_name : str ) -> Callable :
45- def _log_request (request : PlaywrightRequest ) -> None :
46- logger .debug (
47- f"[Context={ context_name } ] Request: <{ request .method .upper ()} { request .url } > "
48- f"(resource type: { request .resource_type } , referrer: { request .headers .get ('referer' )} )"
49- )
50-
51- return _log_request
52-
53-
54- def _make_response_logger (context_name : str ) -> Callable :
55- def _log_request (response : PlaywrightResponse ) -> None :
56- logger .debug (
57- f"[Context={ context_name } ] Response: <{ response .status } { response .url } > "
58- f"(referrer: { response .headers .get ('referer' )} )"
59- )
60-
61- return _log_request
62-
63-
6444class ScrapyPlaywrightDownloadHandler (HTTPDownloadHandler ):
6545 def __init__ (self , crawler : Crawler ) -> None :
6646 super ().__init__ (settings = crawler .settings , crawler = crawler )
@@ -289,9 +269,7 @@ async def _apply_page_methods(self, page: Page, request: Request) -> None:
289269 except AttributeError :
290270 logger .warning (f"Ignoring { repr (pm )} : could not find method" )
291271 else :
292- pm .result = method (* pm .args , ** pm .kwargs )
293- if isinstance (pm .result , Awaitable ):
294- pm .result = await pm .result
272+ pm .result = await _await_if_necessary (method (* pm .args , ** pm .kwargs ))
295273 await page .wait_for_load_state (timeout = self .default_navigation_timeout )
296274 else :
297275 logger .warning (f"Ignoring { repr (pm )} : expected PageMethod, got { repr (type (pm ))} " )
@@ -333,16 +311,14 @@ def _make_request_handler(
333311 async def _request_handler (route : Route , playwright_request : PlaywrightRequest ) -> None :
334312 """Override request headers, method and body."""
335313 if self .abort_request :
336- should_abort = self .abort_request (playwright_request )
337- if isinstance (should_abort , Awaitable ):
338- should_abort = await should_abort
314+ should_abort = await _await_if_necessary (self .abort_request (playwright_request ))
339315 if should_abort :
340316 await route .abort ()
341317 self .stats .inc_value ("playwright/request_count/aborted" )
342318 return None
343319
344- processed_headers = await self . process_request_headers (
345- self .browser_type , playwright_request , scrapy_headers
320+ processed_headers = await _await_if_necessary (
321+ self .process_request_headers ( self . browser_type , playwright_request , scrapy_headers )
346322 )
347323
348324 # the request that reaches the callback should contain the headers that were sent
@@ -368,6 +344,32 @@ async def _request_handler(route: Route, playwright_request: PlaywrightRequest)
368344 return _request_handler
369345
370346
347+ async def _await_if_necessary (obj ):
348+ if isinstance (obj , Awaitable ):
349+ return await obj
350+ return obj
351+
352+
353+ def _make_request_logger (context_name : str ) -> Callable :
354+ def _log_request (request : PlaywrightRequest ) -> None :
355+ logger .debug (
356+ f"[Context={ context_name } ] Request: <{ request .method .upper ()} { request .url } > "
357+ f"(resource type: { request .resource_type } , referrer: { request .headers .get ('referer' )} )"
358+ )
359+
360+ return _log_request
361+
362+
363+ def _make_response_logger (context_name : str ) -> Callable :
364+ def _log_request (response : PlaywrightResponse ) -> None :
365+ logger .debug (
366+ f"[Context={ context_name } ] Response: <{ response .status } { response .url } > "
367+ f"(referrer: { response .headers .get ('referer' )} )"
368+ )
369+
370+ return _log_request
371+
372+
371373def _possible_encodings (headers : Headers , text : str ) -> Generator [str , None , None ]:
372374 if headers .get ("content-type" ):
373375 content_type = to_unicode (headers ["content-type" ])
0 commit comments