Revert "[tests] refactors tests, adds tests for spider attr"

pawelmhm · Gallaecio · commit 9e362e569b2c · 2019-08-12T17:03:25.000+02:00
This reverts commit bed8998. T#
diff --git a/tests/test_middleware.py b/tests/test_middleware.py
@@ -5,6 +5,7 @@
 import base64
 
 import scrapy
+from scrapy.core.engine import ExecutionEngine
 from scrapy.utils.test import get_crawler
 from scrapy.http import Response, TextResponse
 from scrapy.downloadermiddlewares.httpcache import HttpCacheMiddleware
@@ -698,56 +699,54 @@ def test_float_wait_arg():
     req = mw.process_request(req1, None)
     assert json.loads(to_native_str(req.body)) == {'url': req1.url, 'wait': 0.5}
 
-    def __init__(self, delay=0.0):
-        self.delay = delay
 
 def test_slot_policy_single_slot():
     mw = _get_mw()
     meta = {'splash': {
         'slot_policy': scrapy_splash.SlotPolicy.SINGLE_SLOT
     }}
 
-class MockedDownloader(object):
+def test_slot_policy_single_slot():
+    mw = _get_mw()
+    meta = {'splash': {
+        'slot_policy': scrapyjs.SlotPolicy.SINGLE_SLOT
+    }}
 
-    def __init__(self):
-        self.slots = {}
+    req1 = scrapy.Request("http://example.com/path?key=value", meta=meta)
+    req1 = mw.process_request(req1, None)
 
-    def _get_slot_key(self, request, spider):
-        if 'download_slot' in request.meta:
-            return request.meta['download_slot']
+    req2 = scrapy.Request("http://fooexample.com/path?key=value", meta=meta)
+    req2 = mw.process_request(req2, None)
 
-        key = urlparse_cached(request).hostname or ''
-        return key
+    assert req1.meta.get('download_slot')
+    assert req1.meta['download_slot'] == req2.meta['download_slot']
 
 def test_slot_policy_per_domain():
     mw = _get_mw()
     meta = {'splash': {
         'slot_policy': scrapy_splash.SlotPolicy.PER_DOMAIN
     }}
 
-class MockedEngine(object):
-    downloader = MockedDownloader()
+def test_slot_policy_per_domain():
+    mw = _get_mw()
+    meta = {'splash': {
+        'slot_policy': scrapyjs.SlotPolicy.PER_DOMAIN
+    }}
 
+    req1 = scrapy.Request("http://example.com/path?key=value", meta=meta)
+    req1 = mw.process_request(req1, None)
 
-class MiddlewareTest(unittest.TestCase):
+    req2 = scrapy.Request("http://example.com/path2", meta=meta)
+    req2 = mw.process_request(req2, None)
 
-    def setUp(self):
-        self.crawler = get_crawler(settings_dict={
-            'DOWNLOAD_HANDLERS': {'s3': None},  # for faster test running
-        })
-        if not hasattr(self.crawler, 'logformatter'):
-            self.crawler.logformatter = None
-        self.crawler.engine = MockedEngine()
-        self.mw = SplashMiddleware.from_crawler(self.crawler)
+    req3 = scrapy.Request("http://fooexample.com/path?key=value", meta=meta)
+    req3 = mw.process_request(req3, None)
 
-    def test_nosplash(self):
-        req = scrapy.Request("http://example.com")
-        old_meta = copy.deepcopy(req.meta)
-        assert self.mw.process_request(req, None) is None
-        assert old_meta == req.meta
+    assert req1.meta.get('download_slot')
+    assert req3.meta.get('download_slot')
 
-    def test_splash_request(self):
-        req = SplashRequest("http://example.com?foo=bar&url=1&wait=100")
+    assert req1.meta['download_slot'] == req2.meta['download_slot']
+    assert req1.meta['download_slot'] != req3.meta['download_slot']
 
 def test_slot_policy_scrapy_default():
     mw = _get_mw()
@@ -757,118 +756,30 @@ def test_slot_policy_scrapy_default():
     req = mw.process_request(req, None)
     assert 'download_slot' not in req.meta
 
-        expected_body = {'url': req.url}
-        expected_body.update(SplashRequest.default_splash_meta['args'])
-        assert json.loads(req2.body) == expected_body
-
-    def test_splash_request_no_url(self):
-        lua_source = "function main(splash) return {result='ok'} end"
-        req1 = SplashRequest(meta={'splash': {
-            'args': {'lua_source': lua_source},
-            'endpoint': 'execute',
-        }})
-        req = self.mw.process_request(req1, None)
-        assert req.url == 'http://127.0.0.1:8050/execute'
-        assert json.loads(req.body) == {
-            'url': 'about:blank',
-            'lua_source': lua_source
-        }
+def test_slot_policy_scrapy_default():
+    mw = _get_mw()
+    req = scrapy.Request("http://example.com", meta = {'splash': {
+        'slot_policy': scrapyjs.SlotPolicy.SCRAPY_DEFAULT
+    }})
+    req = mw.process_request(req, None)
+    assert 'download_slot' not in req.meta
 
-    def test_override_splash_url(self):
-        req1 = scrapy.Request("http://example.com", meta={
-            'splash': {
-                'endpoint': 'render.png',
-                'splash_url': 'http://splash.example.com'
-            }
-        })
-        req = self.mw.process_request(req1, None)
-        assert req.url == 'http://splash.example.com/render.png'
-        assert json.loads(req.body) == {'url': req1.url}
-
-    def test_float_wait_arg(self):
-        req1 = scrapy.Request("http://example.com", meta={
-            'splash': {
-                'endpoint': 'render.html',
-                'args': {'wait': 0.5}
-            }
-        })
-        req = self.mw.process_request(req1, None)
-        assert json.loads(req.body) == {'url': req1.url, 'wait': 0.5}
-
-    def test_slot_policy_single_slot(self):
-        meta = {'splash': {
-            'slot_policy': scrapyjs.SlotPolicy.SINGLE_SLOT
-        }}
-
-        req1 = scrapy.Request("http://example.com/path?key=value", meta=meta)
-        req1 = self.mw.process_request(req1, None)
-
-        req2 = scrapy.Request("http://fooexample.com/path?key=value", meta=meta)
-        req2 = self.mw.process_request(req2, None)
-
-        assert req1.meta.get('download_slot')
-        assert req1.meta['download_slot'] == req2.meta['download_slot']
-
-    def test_slot_policy_per_domain(self):
-        meta = {'splash': {
-            'slot_policy': scrapyjs.SlotPolicy.PER_DOMAIN
-        }}
-
-        req1 = scrapy.Request("http://example.com/path?key=value", meta=meta)
-        req1 = self.mw.process_request(req1, None)
-
-        req2 = scrapy.Request("http://example.com/path2", meta=meta)
-        req2 = self.mw.process_request(req2, None)
-
-        req3 = scrapy.Request("http://fooexample.com/path?key=value", meta=meta)
-        req3 = self.mw.process_request(req3, None)
-
-        assert req1.meta.get('download_slot')
-        assert req3.meta.get('download_slot')
-
-        assert req1.meta['download_slot'] == req2.meta['download_slot']
-        assert req1.meta['download_slot'] != req3.meta['download_slot']
-
-    def test_slot_policy_scrapy_default(self):
-        req = scrapy.Request("http://example.com", meta = {'splash': {
-            'slot_policy': scrapyjs.SlotPolicy.SCRAPY_DEFAULT
-        }})
-        req = self.mw.process_request(req, None)
-        assert 'download_slot' not in req.meta
-
-    def test_adjust_timeout(self):
-        req1 = scrapy.Request("http://example.com", meta = {
-            'splash': {'args': {'timeout': 60, 'html': 1}},
-
-            # download_timeout is always present,
-            # it is set by DownloadTimeoutMiddleware
-            'download_timeout': 30,
-        })
-        req1 = self.mw.process_request(req1, None)
-        assert req1.meta['download_timeout'] > 60
-
-        req2 = scrapy.Request("http://example.com", meta = {
-            'splash': {'args': {'html': 1}},
-            'download_timeout': 30,
-        })
-        req2 = self.mw.process_request(req2, None)
-        assert req2.meta['download_timeout'] == 30
-
-    def test_spider_attribute(self):
-        req_url = "http://scrapy.org"
-        req1 = scrapy.Request(req_url)
-
-        spider = self.crawler._create_spider("foo")
-        spider.splash = {"args": {"images": 0}}
-
-        req1 = self.mw.process_request(req1, spider)
-        self.assertIn("_splash_processed", req1.meta)
-        self.assertIn("render.json", req1.url)
-        self.assertIn("url", json.loads(req1.body))
-        self.assertEqual(json.loads(req1.body).get("url"), req_url)
-        self.assertIn("images", json.loads(req1.body))
-
-        # spider attribute blank middleware disabled
-        spider.splash = {}
-        req2 = self.mw.process_request(req1, spider)
-        self.assertIsNone(req2)
+
+def test_adjust_timeout():
+    mw = _get_mw()
+    req1 = scrapy.Request("http://example.com", meta = {
+        'splash': {'args': {'timeout': 60, 'html': 1}},
+
+        # download_timeout is always present,
+        # it is set by DownloadTimeoutMiddleware
+        'download_timeout': 30,
+    })
+    req1 = mw.process_request(req1, None)
+    assert req1.meta['download_timeout'] > 60
+
+    req2 = scrapy.Request("http://example.com", meta = {
+        'splash': {'args': {'html': 1}},
+        'download_timeout': 30,
+    })
+    req2 = mw.process_request(req2, None)
+    assert req2.meta['download_timeout'] == 30