|
11 | 11 | Page, |
12 | 12 | PlaywrightContextManager, |
13 | 13 | Request as PlaywrightRequest, |
| 14 | + Response as PlaywrightResponse, |
14 | 15 | Route, |
15 | 16 | ) |
16 | 17 | from scrapy import Spider, signals |
|
35 | 36 | logger = logging.getLogger("scrapy-playwright") |
36 | 37 |
|
37 | 38 |
|
| 39 | +def _make_request_logger(context_name: str) -> Callable: |
| 40 | + def _log_request(request: PlaywrightRequest) -> None: |
| 41 | + logger.debug( |
| 42 | + f"[Context={context_name}] Request: <{request.method.upper()} {request.url}> " |
| 43 | + f"(resource type: {request.resource_type}, referrer: {request.headers.get('referer')})" |
| 44 | + ) |
| 45 | + |
| 46 | + return _log_request |
| 47 | + |
| 48 | + |
| 49 | +def _make_response_logger(context_name: str) -> Callable: |
| 50 | + def _log_request(response: PlaywrightResponse) -> None: |
| 51 | + logger.debug( |
| 52 | + f"[Context={context_name}] Response: <{response.status} {response.url}> " |
| 53 | + f"(referrer: {response.headers.get('referer')})" |
| 54 | + ) |
| 55 | + |
| 56 | + return _log_request |
| 57 | + |
| 58 | + |
38 | 59 | class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler): |
39 | 60 | def __init__(self, crawler: Crawler) -> None: |
40 | 61 | super().__init__(settings=crawler.settings, crawler=crawler) |
@@ -107,6 +128,8 @@ async def _create_page(self, request: Request) -> Page: |
107 | 128 | context = await self._create_browser_context(context_name, context_kwargs) |
108 | 129 | self.contexts[context_name] = context |
109 | 130 | page = await context.new_page() |
| 131 | + page.on("request", _make_request_logger(context_name)) |
| 132 | + page.on("response", _make_response_logger(context_name)) |
110 | 133 | self.stats.inc_value("playwright/page_count") |
111 | 134 | if self.default_navigation_timeout: |
112 | 135 | page.set_default_navigation_timeout(self.default_navigation_timeout) |
|
0 commit comments