|
| 1 | +import json |
| 2 | +import platform |
| 3 | +import sys |
| 4 | +import warnings |
| 5 | + |
| 6 | +import pytest |
| 7 | +from scrapy import Spider, Request |
| 8 | +from scrapy.http.headers import Headers |
| 9 | + |
| 10 | +from tests import make_handler |
| 11 | +from tests.mockserver import MockServer |
| 12 | + |
| 13 | +from scrapy_playwright.headers import use_playwright_headers |
| 14 | + |
| 15 | + |
| 16 | +@pytest.mark.skipif(sys.version_info < (3, 8), reason="AsyncMock was added on Python 3.8") |
| 17 | +@pytest.mark.asyncio |
| 18 | +async def test_use_playwright_headers_deprecated(): |
| 19 | + from unittest.mock import AsyncMock |
| 20 | + |
| 21 | + headers = {"foo": "bar", "a": "b"} |
| 22 | + playwright_request = AsyncMock() |
| 23 | + playwright_request.all_headers.return_value = headers |
| 24 | + with warnings.catch_warnings(record=True) as warning_list: |
| 25 | + processed_headers = await use_playwright_headers("foobar", playwright_request, Headers({})) |
| 26 | + assert processed_headers == headers |
| 27 | + assert str(warning_list[0].message) == ( |
| 28 | + "The 'scrapy_playwright.headers.use_playwright_headers' function is" |
| 29 | + " deprecated, please set 'PLAYWRIGHT_PROCESS_REQUEST_HEADERS=None'" |
| 30 | + " instead." |
| 31 | + ) |
| 32 | + |
| 33 | + |
| 34 | +class MixinProcessHeadersTestCase: |
| 35 | + @pytest.mark.asyncio |
| 36 | + async def test_user_agent(self): |
| 37 | + settings_dict = { |
| 38 | + "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, |
| 39 | + "PLAYWRIGHT_CONTEXTS": {"default": {"user_agent": self.browser_type}}, |
| 40 | + "USER_AGENT": None, |
| 41 | + } |
| 42 | + async with make_handler(settings_dict) as handler: |
| 43 | + with MockServer() as server: |
| 44 | + # if Scrapy's user agent is None, use the one from the Browser |
| 45 | + req = Request( |
| 46 | + url=server.urljoin("/headers"), |
| 47 | + meta={"playwright": True}, |
| 48 | + ) |
| 49 | + resp = await handler._download_request(req, Spider("foo")) |
| 50 | + headers = json.loads(resp.css("pre::text").get()) |
| 51 | + headers = {key.lower(): value for key, value in headers.items()} |
| 52 | + assert headers["user-agent"] == self.browser_type |
| 53 | + |
| 54 | + # if Scrapy's user agent is set to some value, use it |
| 55 | + req = Request( |
| 56 | + url=server.urljoin("/headers"), |
| 57 | + meta={"playwright": True}, |
| 58 | + headers={"User-Agent": "foobar"}, |
| 59 | + ) |
| 60 | + resp = await handler._download_request(req, Spider("foo")) |
| 61 | + headers = json.loads(resp.css("pre::text").get()) |
| 62 | + headers = {key.lower(): value for key, value in headers.items()} |
| 63 | + assert headers["user-agent"] == "foobar" |
| 64 | + |
| 65 | + @pytest.mark.asyncio |
| 66 | + async def test_use_playwright_headers(self): |
| 67 | + """Ignore Scrapy headers""" |
| 68 | + settings_dict = { |
| 69 | + "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, |
| 70 | + "PLAYWRIGHT_CONTEXTS": {"default": {"user_agent": self.browser_type}}, |
| 71 | + "PLAYWRIGHT_PROCESS_REQUEST_HEADERS": None, |
| 72 | + "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 2000, |
| 73 | + } |
| 74 | + async with make_handler(settings_dict) as handler: |
| 75 | + with MockServer() as server: |
| 76 | + req = Request( |
| 77 | + url=server.urljoin("/headers"), |
| 78 | + meta={"playwright": True}, |
| 79 | + headers={"User-Agent": "foobar", "Asdf": "qwerty"}, |
| 80 | + ) |
| 81 | + resp = await handler._download_request(req, Spider("foo")) |
| 82 | + headers = json.loads(resp.css("pre::text").get()) |
| 83 | + headers = {key.lower(): value for key, value in headers.items()} |
| 84 | + assert headers["user-agent"] == self.browser_type |
| 85 | + assert "asdf" not in headers |
| 86 | + |
| 87 | + @pytest.mark.asyncio |
| 88 | + async def test_use_playwright_headers_deprecated(self): |
| 89 | + """Ignore Scrapy headers""" |
| 90 | + settings_dict = { |
| 91 | + "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, |
| 92 | + "PLAYWRIGHT_CONTEXTS": {"default": {"user_agent": self.browser_type}}, |
| 93 | + "PLAYWRIGHT_PROCESS_REQUEST_HEADERS": use_playwright_headers, |
| 94 | + "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 2000, |
| 95 | + } |
| 96 | + with warnings.catch_warnings(record=True) as warning_list: |
| 97 | + async with make_handler(settings_dict) as handler: |
| 98 | + with MockServer() as server: |
| 99 | + req = Request( |
| 100 | + url=server.urljoin("/headers"), |
| 101 | + meta={"playwright": True}, |
| 102 | + headers={"User-Agent": "foobar", "Asdf": "qwerty"}, |
| 103 | + ) |
| 104 | + resp = await handler._download_request(req, Spider("foo")) |
| 105 | + headers = json.loads(resp.css("pre::text").get()) |
| 106 | + headers = {key.lower(): value for key, value in headers.items()} |
| 107 | + assert headers["user-agent"] == self.browser_type |
| 108 | + assert "asdf" not in headers |
| 109 | + |
| 110 | + assert str(warning_list[0].message) == ( |
| 111 | + "The 'scrapy_playwright.headers.use_playwright_headers' function is" |
| 112 | + " deprecated, please set 'PLAYWRIGHT_PROCESS_REQUEST_HEADERS=None'" |
| 113 | + " instead." |
| 114 | + ) |
| 115 | + |
| 116 | + @pytest.mark.asyncio |
| 117 | + async def test_use_custom_headers(self): |
| 118 | + """Custom header processing function""" |
| 119 | + |
| 120 | + async def important_headers(*args, **kwargs) -> dict: |
| 121 | + return {"foo": "bar"} |
| 122 | + |
| 123 | + settings_dict = { |
| 124 | + "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, |
| 125 | + "PLAYWRIGHT_CONTEXTS": {"default": {"user_agent": self.browser_type}}, |
| 126 | + "PLAYWRIGHT_PROCESS_REQUEST_HEADERS": important_headers, |
| 127 | + } |
| 128 | + async with make_handler(settings_dict) as handler: |
| 129 | + with MockServer() as server: |
| 130 | + req = Request( |
| 131 | + url=server.urljoin("/headers"), |
| 132 | + meta={"playwright": True}, |
| 133 | + headers={"User-Agent": "foobar", "Asdf": "qwerty"}, |
| 134 | + ) |
| 135 | + resp = await handler._download_request(req, Spider("foo")) |
| 136 | + headers = json.loads(resp.css("pre::text").get()) |
| 137 | + headers = {key.lower(): value for key, value in headers.items()} |
| 138 | + assert headers["foo"] == "bar" |
| 139 | + assert headers.get("user-agent") not in (self.browser_type, "foobar") |
| 140 | + assert "asdf" not in headers |
| 141 | + |
| 142 | + |
| 143 | +class TestProcessHeadersChromium(MixinProcessHeadersTestCase): |
| 144 | + browser_type = "chromium" |
| 145 | + |
| 146 | + |
| 147 | +class TestProcessHeadersFirefox(MixinProcessHeadersTestCase): |
| 148 | + browser_type = "firefox" |
| 149 | + |
| 150 | + |
| 151 | +@pytest.mark.skipif(platform.system() != "Darwin", reason="Test WebKit only on Darwin") |
| 152 | +class TestProcessHeadersWebkit(MixinProcessHeadersTestCase): |
| 153 | + browser_type = "webkit" |
0 commit comments