Skip to content

Commit 414c3d2

Browse files
committed
Add example for the PLAYWRIGHT_PROCESS_REQUEST_HEADERS setting
1 parent 1c5f96e commit 414c3d2

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed

examples/headers.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import json
2+
from pathlib import Path
3+
4+
from scrapy import Spider, Request
5+
from scrapy_playwright.page import PageMethod
6+
7+
8+
class HeadersSpider(Spider):
9+
"""Control how requests headers are handled with the PLAYWRIGHT_PROCESS_REQUEST_HEADERS setting.
10+
11+
If PLAYWRIGHT_PROCESS_REQUEST_HEADERS=None, neither USER_AGENT nor cookies will be sent to the
12+
website, comment out PLAYWRIGHT_PROCESS_REQUEST_HEADERS to sent them.
13+
"""
14+
15+
name = "headers"
16+
custom_settings = {
17+
"TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
18+
"DOWNLOAD_HANDLERS": {
19+
"https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
20+
# "http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
21+
},
22+
"PLAYWRIGHT_PROCESS_REQUEST_HEADERS": None,
23+
"USER_AGENT": "Overridden user agent",
24+
}
25+
26+
def start_requests(self):
27+
yield Request(
28+
url="https://httpbin.org/headers",
29+
meta={
30+
"playwright": True,
31+
"playwright_page_methods": [
32+
PageMethod(
33+
"screenshot", path=Path(__file__).parent / "headers.png", full_page=True
34+
),
35+
],
36+
},
37+
cookies={"foo": "bar"},
38+
)
39+
40+
def parse(self, response):
41+
headers = json.loads(response.css("pre::text").get())["headers"]
42+
yield {"url": response.url, "headers": headers}

0 commit comments

Comments
 (0)