Skip to content

Commit 9e362e5

Browse files
pawelmhmGallaecio
authored andcommitted
Revert "[tests] refactors tests, adds tests for spider attr"
This reverts commit bed8998. T#
1 parent 42fa592 commit 9e362e5

File tree

1 file changed

+53
-142
lines changed

1 file changed

+53
-142
lines changed

tests/test_middleware.py

Lines changed: 53 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import base64
66

77
import scrapy
8+
from scrapy.core.engine import ExecutionEngine
89
from scrapy.utils.test import get_crawler
910
from scrapy.http import Response, TextResponse
1011
from scrapy.downloadermiddlewares.httpcache import HttpCacheMiddleware
@@ -698,56 +699,54 @@ def test_float_wait_arg():
698699
req = mw.process_request(req1, None)
699700
assert json.loads(to_native_str(req.body)) == {'url': req1.url, 'wait': 0.5}
700701

701-
def __init__(self, delay=0.0):
702-
self.delay = delay
703702

704703
def test_slot_policy_single_slot():
705704
mw = _get_mw()
706705
meta = {'splash': {
707706
'slot_policy': scrapy_splash.SlotPolicy.SINGLE_SLOT
708707
}}
709708

710-
class MockedDownloader(object):
709+
def test_slot_policy_single_slot():
710+
mw = _get_mw()
711+
meta = {'splash': {
712+
'slot_policy': scrapyjs.SlotPolicy.SINGLE_SLOT
713+
}}
711714

712-
def __init__(self):
713-
self.slots = {}
715+
req1 = scrapy.Request("http://example.com/path?key=value", meta=meta)
716+
req1 = mw.process_request(req1, None)
714717

715-
def _get_slot_key(self, request, spider):
716-
if 'download_slot' in request.meta:
717-
return request.meta['download_slot']
718+
req2 = scrapy.Request("http://fooexample.com/path?key=value", meta=meta)
719+
req2 = mw.process_request(req2, None)
718720

719-
key = urlparse_cached(request).hostname or ''
720-
return key
721+
assert req1.meta.get('download_slot')
722+
assert req1.meta['download_slot'] == req2.meta['download_slot']
721723

722724
def test_slot_policy_per_domain():
723725
mw = _get_mw()
724726
meta = {'splash': {
725727
'slot_policy': scrapy_splash.SlotPolicy.PER_DOMAIN
726728
}}
727729

728-
class MockedEngine(object):
729-
downloader = MockedDownloader()
730+
def test_slot_policy_per_domain():
731+
mw = _get_mw()
732+
meta = {'splash': {
733+
'slot_policy': scrapyjs.SlotPolicy.PER_DOMAIN
734+
}}
730735

736+
req1 = scrapy.Request("http://example.com/path?key=value", meta=meta)
737+
req1 = mw.process_request(req1, None)
731738

732-
class MiddlewareTest(unittest.TestCase):
739+
req2 = scrapy.Request("http://example.com/path2", meta=meta)
740+
req2 = mw.process_request(req2, None)
733741

734-
def setUp(self):
735-
self.crawler = get_crawler(settings_dict={
736-
'DOWNLOAD_HANDLERS': {'s3': None}, # for faster test running
737-
})
738-
if not hasattr(self.crawler, 'logformatter'):
739-
self.crawler.logformatter = None
740-
self.crawler.engine = MockedEngine()
741-
self.mw = SplashMiddleware.from_crawler(self.crawler)
742+
req3 = scrapy.Request("http://fooexample.com/path?key=value", meta=meta)
743+
req3 = mw.process_request(req3, None)
742744

743-
def test_nosplash(self):
744-
req = scrapy.Request("http://example.com")
745-
old_meta = copy.deepcopy(req.meta)
746-
assert self.mw.process_request(req, None) is None
747-
assert old_meta == req.meta
745+
assert req1.meta.get('download_slot')
746+
assert req3.meta.get('download_slot')
748747

749-
def test_splash_request(self):
750-
req = SplashRequest("http://example.com?foo=bar&url=1&wait=100")
748+
assert req1.meta['download_slot'] == req2.meta['download_slot']
749+
assert req1.meta['download_slot'] != req3.meta['download_slot']
751750

752751
def test_slot_policy_scrapy_default():
753752
mw = _get_mw()
@@ -757,118 +756,30 @@ def test_slot_policy_scrapy_default():
757756
req = mw.process_request(req, None)
758757
assert 'download_slot' not in req.meta
759758

760-
expected_body = {'url': req.url}
761-
expected_body.update(SplashRequest.default_splash_meta['args'])
762-
assert json.loads(req2.body) == expected_body
763-
764-
def test_splash_request_no_url(self):
765-
lua_source = "function main(splash) return {result='ok'} end"
766-
req1 = SplashRequest(meta={'splash': {
767-
'args': {'lua_source': lua_source},
768-
'endpoint': 'execute',
769-
}})
770-
req = self.mw.process_request(req1, None)
771-
assert req.url == 'http://127.0.0.1:8050/execute'
772-
assert json.loads(req.body) == {
773-
'url': 'about:blank',
774-
'lua_source': lua_source
775-
}
759+
def test_slot_policy_scrapy_default():
760+
mw = _get_mw()
761+
req = scrapy.Request("http://example.com", meta = {'splash': {
762+
'slot_policy': scrapyjs.SlotPolicy.SCRAPY_DEFAULT
763+
}})
764+
req = mw.process_request(req, None)
765+
assert 'download_slot' not in req.meta
776766

777-
def test_override_splash_url(self):
778-
req1 = scrapy.Request("http://example.com", meta={
779-
'splash': {
780-
'endpoint': 'render.png',
781-
'splash_url': 'http://splash.example.com'
782-
}
783-
})
784-
req = self.mw.process_request(req1, None)
785-
assert req.url == 'http://splash.example.com/render.png'
786-
assert json.loads(req.body) == {'url': req1.url}
787-
788-
def test_float_wait_arg(self):
789-
req1 = scrapy.Request("http://example.com", meta={
790-
'splash': {
791-
'endpoint': 'render.html',
792-
'args': {'wait': 0.5}
793-
}
794-
})
795-
req = self.mw.process_request(req1, None)
796-
assert json.loads(req.body) == {'url': req1.url, 'wait': 0.5}
797-
798-
def test_slot_policy_single_slot(self):
799-
meta = {'splash': {
800-
'slot_policy': scrapyjs.SlotPolicy.SINGLE_SLOT
801-
}}
802-
803-
req1 = scrapy.Request("http://example.com/path?key=value", meta=meta)
804-
req1 = self.mw.process_request(req1, None)
805-
806-
req2 = scrapy.Request("http://fooexample.com/path?key=value", meta=meta)
807-
req2 = self.mw.process_request(req2, None)
808-
809-
assert req1.meta.get('download_slot')
810-
assert req1.meta['download_slot'] == req2.meta['download_slot']
811-
812-
def test_slot_policy_per_domain(self):
813-
meta = {'splash': {
814-
'slot_policy': scrapyjs.SlotPolicy.PER_DOMAIN
815-
}}
816-
817-
req1 = scrapy.Request("http://example.com/path?key=value", meta=meta)
818-
req1 = self.mw.process_request(req1, None)
819-
820-
req2 = scrapy.Request("http://example.com/path2", meta=meta)
821-
req2 = self.mw.process_request(req2, None)
822-
823-
req3 = scrapy.Request("http://fooexample.com/path?key=value", meta=meta)
824-
req3 = self.mw.process_request(req3, None)
825-
826-
assert req1.meta.get('download_slot')
827-
assert req3.meta.get('download_slot')
828-
829-
assert req1.meta['download_slot'] == req2.meta['download_slot']
830-
assert req1.meta['download_slot'] != req3.meta['download_slot']
831-
832-
def test_slot_policy_scrapy_default(self):
833-
req = scrapy.Request("http://example.com", meta = {'splash': {
834-
'slot_policy': scrapyjs.SlotPolicy.SCRAPY_DEFAULT
835-
}})
836-
req = self.mw.process_request(req, None)
837-
assert 'download_slot' not in req.meta
838-
839-
def test_adjust_timeout(self):
840-
req1 = scrapy.Request("http://example.com", meta = {
841-
'splash': {'args': {'timeout': 60, 'html': 1}},
842-
843-
# download_timeout is always present,
844-
# it is set by DownloadTimeoutMiddleware
845-
'download_timeout': 30,
846-
})
847-
req1 = self.mw.process_request(req1, None)
848-
assert req1.meta['download_timeout'] > 60
849-
850-
req2 = scrapy.Request("http://example.com", meta = {
851-
'splash': {'args': {'html': 1}},
852-
'download_timeout': 30,
853-
})
854-
req2 = self.mw.process_request(req2, None)
855-
assert req2.meta['download_timeout'] == 30
856-
857-
def test_spider_attribute(self):
858-
req_url = "http://scrapy.org"
859-
req1 = scrapy.Request(req_url)
860-
861-
spider = self.crawler._create_spider("foo")
862-
spider.splash = {"args": {"images": 0}}
863-
864-
req1 = self.mw.process_request(req1, spider)
865-
self.assertIn("_splash_processed", req1.meta)
866-
self.assertIn("render.json", req1.url)
867-
self.assertIn("url", json.loads(req1.body))
868-
self.assertEqual(json.loads(req1.body).get("url"), req_url)
869-
self.assertIn("images", json.loads(req1.body))
870-
871-
# spider attribute blank middleware disabled
872-
spider.splash = {}
873-
req2 = self.mw.process_request(req1, spider)
874-
self.assertIsNone(req2)
767+
768+
def test_adjust_timeout():
769+
mw = _get_mw()
770+
req1 = scrapy.Request("http://example.com", meta = {
771+
'splash': {'args': {'timeout': 60, 'html': 1}},
772+
773+
# download_timeout is always present,
774+
# it is set by DownloadTimeoutMiddleware
775+
'download_timeout': 30,
776+
})
777+
req1 = mw.process_request(req1, None)
778+
assert req1.meta['download_timeout'] > 60
779+
780+
req2 = scrapy.Request("http://example.com", meta = {
781+
'splash': {'args': {'html': 1}},
782+
'download_timeout': 30,
783+
})
784+
req2 = mw.process_request(req2, None)
785+
assert req2.meta['download_timeout'] == 30

0 commit comments

Comments
 (0)