Skip to content

Commit bd2ae24

Browse files
authored
Fix request method override (#144)
1 parent d95ff08 commit bd2ae24

File tree

3 files changed

+26
-16
lines changed

3 files changed

+26
-16
lines changed

pylintrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ disable=
77
missing-function-docstring,
88
missing-module-docstring,
99
too-few-public-methods,
10+
too-many-arguments,
1011
too-many-instance-attributes,
1112
too-many-locals,
1213
unused-argument,

scrapy_playwright/handler.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,8 @@ async def _download_request(self, request: Request, spider: Spider) -> Response:
269269
"**",
270270
self._make_request_handler(
271271
method=request.method,
272-
scrapy_headers=request.headers,
272+
url=request.url,
273+
headers=request.headers,
273274
body=request.body,
274275
encoding=request.encoding,
275276
),
@@ -399,7 +400,12 @@ def close_browser_context_callback() -> None:
399400
return close_browser_context_callback
400401

401402
def _make_request_handler(
402-
self, method: str, scrapy_headers: Headers, body: Optional[bytes], encoding: str = "utf8"
403+
self,
404+
method: str,
405+
url: str,
406+
headers: Headers,
407+
body: Optional[bytes],
408+
encoding: str = "utf8",
403409
) -> Callable:
404410
async def _request_handler(route: Route, playwright_request: PlaywrightRequest) -> None:
405411
"""Override request headers, method and body."""
@@ -417,17 +423,18 @@ async def _request_handler(route: Route, playwright_request: PlaywrightRequest)
417423
else:
418424
overrides["headers"] = final_headers = await _maybe_await(
419425
self.process_request_headers(
420-
self.browser_type_name, playwright_request, scrapy_headers
426+
self.browser_type_name, playwright_request, headers
421427
)
422428
)
423429
# the request that reaches the callback should contain the final headers
424-
scrapy_headers.clear()
425-
scrapy_headers.update(final_headers)
430+
headers.clear()
431+
headers.update(final_headers)
426432
del final_headers
427433

428-
if playwright_request.is_navigation_request():
434+
# if the request is triggered by scrapy, not playwright
435+
if playwright_request.url == url:
429436
overrides["method"] = method
430-
if body is not None:
437+
if body:
431438
overrides["post_data"] = body.decode(encoding)
432439

433440
try:
@@ -469,13 +476,14 @@ async def _log_request(request: PlaywrightRequest) -> None:
469476
def _make_response_logger(context_name: str) -> Callable:
470477
async def _log_request(response: PlaywrightResponse) -> None:
471478
referrer = await response.header_value("referer")
472-
logger.debug(
473-
"[Context=%s] Response: <%i %s> (referrer: %s)",
474-
context_name,
475-
response.status,
476-
response.url,
477-
referrer,
478-
)
479+
log_args = [context_name, response.status, response.url, referrer]
480+
if 300 <= response.status < 400:
481+
location = await response.header_value("location")
482+
log_args.append(location)
483+
msg = "[Context=%s] Response: <%i %s> (referrer: %s, location: %s)"
484+
else:
485+
msg = "[Context=%s] Response: <%i %s> (referrer: %s)"
486+
logger.debug(msg, *log_args)
479487

480488
return _log_request
481489

tests/test_playwright_requests.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,11 @@ async def test_route_continue_exception(self, logger):
125125
from unittest.mock import AsyncMock
126126

127127
async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler:
128-
req_handler = handler._make_request_handler("GET", Headers({}), body=None)
128+
example_url = "https//example.org"
129+
req_handler = handler._make_request_handler("GET", example_url, Headers({}), body=None)
129130
route = MagicMock()
130131
playwright_request = AsyncMock()
131-
playwright_request.url = "https//example.org"
132+
playwright_request.url = example_url
132133
playwright_request.is_navigation_request = MagicMock(return_value=True)
133134
playwright_request.all_headers.return_value = {}
134135

0 commit comments

Comments
 (0)