11import asyncio
22import logging
3+ import os
4+ import platform
35import random
46import re
7+ import signal
58import subprocess
69import time
710import uuid
811from contextlib import asynccontextmanager
912from pathlib import Path
13+ from threading import Thread
1014from typing import Tuple
1115from unittest import IsolatedAsyncioTestCase
1216
17+ import psutil
1318import pytest
19+ from playwright ._impl ._errors import TargetClosedError
1420from playwright .async_api import async_playwright
1521from scrapy import Request , Spider
1622
@@ -77,7 +83,7 @@ async def remote_chromium(with_devtools_protocol: bool = True):
7783 proc .communicate ()
7884
7985
80- class TestRemoteBrowser (IsolatedAsyncioTestCase ):
86+ class TestBrowserRemoteChromium (IsolatedAsyncioTestCase ):
8187 @pytest .fixture (autouse = True )
8288 def inject_fixtures (self , caplog ):
8389 caplog .set_level (logging .DEBUG )
@@ -87,6 +93,7 @@ def inject_fixtures(self, caplog):
8793 async def test_connect_devtools (self ):
8894 async with remote_chromium (with_devtools_protocol = True ) as devtools_url :
8995 settings_dict = {
96+ "PLAYWRIGHT_BROWSER_TYPE" : "chromium" ,
9097 "PLAYWRIGHT_CDP_URL" : devtools_url ,
9198 "PLAYWRIGHT_LAUNCH_OPTIONS" : {"headless" : True },
9299 }
@@ -105,6 +112,7 @@ async def test_connect_devtools(self):
105112 async def test_connect (self ):
106113 async with remote_chromium (with_devtools_protocol = False ) as browser_url :
107114 settings_dict = {
115+ "PLAYWRIGHT_BROWSER_TYPE" : "chromium" ,
108116 "PLAYWRIGHT_CONNECT_URL" : browser_url ,
109117 "PLAYWRIGHT_LAUNCH_OPTIONS" : {"headless" : True },
110118 }
@@ -130,16 +138,22 @@ async def test_connect(self):
130138 ) in self ._caplog .record_tuples
131139
132140
133- class TestBrowserReconnect (IsolatedAsyncioTestCase ):
141+ class TestBrowserReconnectChromium (IsolatedAsyncioTestCase ):
134142 @pytest .fixture (autouse = True )
135143 def inject_fixtures (self , caplog ):
136144 caplog .set_level (logging .DEBUG )
137145 self ._caplog = caplog
138146
147+ @staticmethod
148+ def kill_chrome ():
149+ for proc in psutil .process_iter (["pid" , "name" ]):
150+ if proc .info ["name" ].lower () in ("chrome" , "chromium" ):
151+ os .kill (proc .info ["pid" ], signal .SIGKILL )
152+
139153 @allow_windows
140- async def test_restart_browser (self ):
154+ async def test_browser_closed_restart (self ):
141155 spider = Spider ("foo" )
142- async with make_handler () as handler :
156+ async with make_handler (settings_dict = { "PLAYWRIGHT_BROWSER_TYPE" : "chromium" } ) as handler :
143157 with StaticMockServer () as server :
144158 req1 = Request (
145159 server .urljoin ("/index.html" ),
@@ -172,3 +186,80 @@ async def test_restart_browser(self):
172186 )
173187 == 2 # one at the beginning, one after calling Browser.close() manually
174188 )
189+
190+ @pytest .mark .skipif (
191+ platform .system () == "Windows" ,
192+ reason = "os.kill does not work as expected on Windows" ,
193+ )
194+ async def test_browser_crashed_restart (self ):
195+ spider = Spider ("foo" )
196+ async with make_handler (settings_dict = {"PLAYWRIGHT_BROWSER_TYPE" : "chromium" }) as handler :
197+ with StaticMockServer () as server :
198+ req1 = Request (
199+ server .urljoin ("/index.html" ),
200+ meta = {"playwright" : True , "playwright_include_page" : True },
201+ )
202+ resp1 = await handler ._download_request (req1 , spider )
203+ thread = Thread (target = self .kill_chrome , daemon = True )
204+ thread .start ()
205+ req2 = Request (server .urljoin ("/gallery.html" ), meta = {"playwright" : True })
206+ req3 = Request (server .urljoin ("/lorem_ipsum.html" ), meta = {"playwright" : True })
207+ req4 = Request (server .urljoin ("/scroll.html" ), meta = {"playwright" : True })
208+ resp2 = await handler ._download_request (req2 , spider )
209+ resp3 = await handler ._download_request (req3 , spider )
210+ resp4 = await handler ._download_request (req4 , spider )
211+ thread .join ()
212+ assert_correct_response (resp1 , req1 )
213+ assert_correct_response (resp2 , req2 )
214+ assert_correct_response (resp3 , req3 )
215+ assert_correct_response (resp4 , req4 )
216+ assert (
217+ self ._caplog .record_tuples .count (
218+ (
219+ "scrapy-playwright" ,
220+ logging .DEBUG ,
221+ "Browser disconnected" ,
222+ )
223+ )
224+ == 2 # one mid-crawl after killing the browser process, one at the end
225+ )
226+ assert (
227+ self ._caplog .record_tuples .count (
228+ (
229+ "scrapy-playwright" ,
230+ logging .INFO ,
231+ "Launching browser chromium" ,
232+ )
233+ )
234+ == 2 # one at the beginning, one after killing the broser process
235+ )
236+
237+ @pytest .mark .skipif (
238+ platform .system () == "Windows" ,
239+ reason = "os.kill does not work as expected on Windows" ,
240+ )
241+ async def test_browser_crashed_do_not_restart (self ):
242+ spider = Spider ("foo" )
243+ settings_dict = {
244+ "PLAYWRIGHT_BROWSER_TYPE" : "chromium" ,
245+ "PLAYWRIGHT_RESTART_DISCONNECTED_BROWSER" : False ,
246+ }
247+ async with make_handler (settings_dict = settings_dict ) as handler :
248+ with StaticMockServer () as server :
249+ await asyncio .sleep (1 ) # allow time for the browser to fully launch
250+ req1 = Request (
251+ server .urljoin ("/index.html" ),
252+ meta = {"playwright" : True , "playwright_include_page" : True },
253+ )
254+ resp1 = await handler ._download_request (req1 , spider )
255+ assert_correct_response (resp1 , req1 )
256+ thread = Thread (target = self .kill_chrome , daemon = True )
257+ thread .start ()
258+ req2 = Request (server .urljoin ("/gallery.html" ), meta = {"playwright" : True })
259+ req3 = Request (server .urljoin ("/lorem_ipsum.html" ), meta = {"playwright" : True })
260+ req4 = Request (server .urljoin ("/scroll.html" ), meta = {"playwright" : True })
261+ with pytest .raises (TargetClosedError ):
262+ await handler ._download_request (req2 , spider )
263+ await handler ._download_request (req3 , spider )
264+ await handler ._download_request (req4 , spider )
265+ thread .join ()
0 commit comments