55import base64
66
77import scrapy
8+ from scrapy .core .engine import ExecutionEngine
89from scrapy .utils .test import get_crawler
910from scrapy .http import Response , TextResponse
1011from scrapy .downloadermiddlewares .httpcache import HttpCacheMiddleware
@@ -698,56 +699,54 @@ def test_float_wait_arg():
698699 req = mw .process_request (req1 , None )
699700 assert json .loads (to_native_str (req .body )) == {'url' : req1 .url , 'wait' : 0.5 }
700701
701- def __init__ (self , delay = 0.0 ):
702- self .delay = delay
703702
704703def test_slot_policy_single_slot ():
705704 mw = _get_mw ()
706705 meta = {'splash' : {
707706 'slot_policy' : scrapy_splash .SlotPolicy .SINGLE_SLOT
708707 }}
709708
710- class MockedDownloader (object ):
709+ def test_slot_policy_single_slot ():
710+ mw = _get_mw ()
711+ meta = {'splash' : {
712+ 'slot_policy' : scrapyjs .SlotPolicy .SINGLE_SLOT
713+ }}
711714
712- def __init__ ( self ):
713- self . slots = {}
715+ req1 = scrapy . Request ( "http://example.com/path?key=value" , meta = meta )
716+ req1 = mw . process_request ( req1 , None )
714717
715- def _get_slot_key (self , request , spider ):
716- if 'download_slot' in request .meta :
717- return request .meta ['download_slot' ]
718+ req2 = scrapy .Request ("http://fooexample.com/path?key=value" , meta = meta )
719+ req2 = mw .process_request (req2 , None )
718720
719- key = urlparse_cached ( request ). hostname or ''
720- return key
721+ assert req1 . meta . get ( 'download_slot' )
722+ assert req1 . meta [ 'download_slot' ] == req2 . meta [ 'download_slot' ]
721723
722724def test_slot_policy_per_domain ():
723725 mw = _get_mw ()
724726 meta = {'splash' : {
725727 'slot_policy' : scrapy_splash .SlotPolicy .PER_DOMAIN
726728 }}
727729
728- class MockedEngine (object ):
729- downloader = MockedDownloader ()
730+ def test_slot_policy_per_domain ():
731+ mw = _get_mw ()
732+ meta = {'splash' : {
733+ 'slot_policy' : scrapyjs .SlotPolicy .PER_DOMAIN
734+ }}
730735
736+ req1 = scrapy .Request ("http://example.com/path?key=value" , meta = meta )
737+ req1 = mw .process_request (req1 , None )
731738
732- class MiddlewareTest (unittest .TestCase ):
739+ req2 = scrapy .Request ("http://example.com/path2" , meta = meta )
740+ req2 = mw .process_request (req2 , None )
733741
734- def setUp (self ):
735- self .crawler = get_crawler (settings_dict = {
736- 'DOWNLOAD_HANDLERS' : {'s3' : None }, # for faster test running
737- })
738- if not hasattr (self .crawler , 'logformatter' ):
739- self .crawler .logformatter = None
740- self .crawler .engine = MockedEngine ()
741- self .mw = SplashMiddleware .from_crawler (self .crawler )
742+ req3 = scrapy .Request ("http://fooexample.com/path?key=value" , meta = meta )
743+ req3 = mw .process_request (req3 , None )
742744
743- def test_nosplash (self ):
744- req = scrapy .Request ("http://example.com" )
745- old_meta = copy .deepcopy (req .meta )
746- assert self .mw .process_request (req , None ) is None
747- assert old_meta == req .meta
745+ assert req1 .meta .get ('download_slot' )
746+ assert req3 .meta .get ('download_slot' )
748747
749- def test_splash_request ( self ):
750- req = SplashRequest ( "http://example.com?foo=bar&url=1&wait=100" )
748+ assert req1 . meta [ 'download_slot' ] == req2 . meta [ 'download_slot' ]
749+ assert req1 . meta [ 'download_slot' ] != req3 . meta [ 'download_slot' ]
751750
752751def test_slot_policy_scrapy_default ():
753752 mw = _get_mw ()
@@ -757,118 +756,30 @@ def test_slot_policy_scrapy_default():
757756 req = mw .process_request (req , None )
758757 assert 'download_slot' not in req .meta
759758
760- expected_body = {'url' : req .url }
761- expected_body .update (SplashRequest .default_splash_meta ['args' ])
762- assert json .loads (req2 .body ) == expected_body
763-
764- def test_splash_request_no_url (self ):
765- lua_source = "function main(splash) return {result='ok'} end"
766- req1 = SplashRequest (meta = {'splash' : {
767- 'args' : {'lua_source' : lua_source },
768- 'endpoint' : 'execute' ,
769- }})
770- req = self .mw .process_request (req1 , None )
771- assert req .url == 'http://127.0.0.1:8050/execute'
772- assert json .loads (req .body ) == {
773- 'url' : 'about:blank' ,
774- 'lua_source' : lua_source
775- }
759+ def test_slot_policy_scrapy_default ():
760+ mw = _get_mw ()
761+ req = scrapy .Request ("http://example.com" , meta = {'splash' : {
762+ 'slot_policy' : scrapyjs .SlotPolicy .SCRAPY_DEFAULT
763+ }})
764+ req = mw .process_request (req , None )
765+ assert 'download_slot' not in req .meta
776766
777- def test_override_splash_url (self ):
778- req1 = scrapy .Request ("http://example.com" , meta = {
779- 'splash' : {
780- 'endpoint' : 'render.png' ,
781- 'splash_url' : 'http://splash.example.com'
782- }
783- })
784- req = self .mw .process_request (req1 , None )
785- assert req .url == 'http://splash.example.com/render.png'
786- assert json .loads (req .body ) == {'url' : req1 .url }
787-
788- def test_float_wait_arg (self ):
789- req1 = scrapy .Request ("http://example.com" , meta = {
790- 'splash' : {
791- 'endpoint' : 'render.html' ,
792- 'args' : {'wait' : 0.5 }
793- }
794- })
795- req = self .mw .process_request (req1 , None )
796- assert json .loads (req .body ) == {'url' : req1 .url , 'wait' : 0.5 }
797-
798- def test_slot_policy_single_slot (self ):
799- meta = {'splash' : {
800- 'slot_policy' : scrapyjs .SlotPolicy .SINGLE_SLOT
801- }}
802-
803- req1 = scrapy .Request ("http://example.com/path?key=value" , meta = meta )
804- req1 = self .mw .process_request (req1 , None )
805-
806- req2 = scrapy .Request ("http://fooexample.com/path?key=value" , meta = meta )
807- req2 = self .mw .process_request (req2 , None )
808-
809- assert req1 .meta .get ('download_slot' )
810- assert req1 .meta ['download_slot' ] == req2 .meta ['download_slot' ]
811-
812- def test_slot_policy_per_domain (self ):
813- meta = {'splash' : {
814- 'slot_policy' : scrapyjs .SlotPolicy .PER_DOMAIN
815- }}
816-
817- req1 = scrapy .Request ("http://example.com/path?key=value" , meta = meta )
818- req1 = self .mw .process_request (req1 , None )
819-
820- req2 = scrapy .Request ("http://example.com/path2" , meta = meta )
821- req2 = self .mw .process_request (req2 , None )
822-
823- req3 = scrapy .Request ("http://fooexample.com/path?key=value" , meta = meta )
824- req3 = self .mw .process_request (req3 , None )
825-
826- assert req1 .meta .get ('download_slot' )
827- assert req3 .meta .get ('download_slot' )
828-
829- assert req1 .meta ['download_slot' ] == req2 .meta ['download_slot' ]
830- assert req1 .meta ['download_slot' ] != req3 .meta ['download_slot' ]
831-
832- def test_slot_policy_scrapy_default (self ):
833- req = scrapy .Request ("http://example.com" , meta = {'splash' : {
834- 'slot_policy' : scrapyjs .SlotPolicy .SCRAPY_DEFAULT
835- }})
836- req = self .mw .process_request (req , None )
837- assert 'download_slot' not in req .meta
838-
839- def test_adjust_timeout (self ):
840- req1 = scrapy .Request ("http://example.com" , meta = {
841- 'splash' : {'args' : {'timeout' : 60 , 'html' : 1 }},
842-
843- # download_timeout is always present,
844- # it is set by DownloadTimeoutMiddleware
845- 'download_timeout' : 30 ,
846- })
847- req1 = self .mw .process_request (req1 , None )
848- assert req1 .meta ['download_timeout' ] > 60
849-
850- req2 = scrapy .Request ("http://example.com" , meta = {
851- 'splash' : {'args' : {'html' : 1 }},
852- 'download_timeout' : 30 ,
853- })
854- req2 = self .mw .process_request (req2 , None )
855- assert req2 .meta ['download_timeout' ] == 30
856-
857- def test_spider_attribute (self ):
858- req_url = "http://scrapy.org"
859- req1 = scrapy .Request (req_url )
860-
861- spider = self .crawler ._create_spider ("foo" )
862- spider .splash = {"args" : {"images" : 0 }}
863-
864- req1 = self .mw .process_request (req1 , spider )
865- self .assertIn ("_splash_processed" , req1 .meta )
866- self .assertIn ("render.json" , req1 .url )
867- self .assertIn ("url" , json .loads (req1 .body ))
868- self .assertEqual (json .loads (req1 .body ).get ("url" ), req_url )
869- self .assertIn ("images" , json .loads (req1 .body ))
870-
871- # spider attribute blank middleware disabled
872- spider .splash = {}
873- req2 = self .mw .process_request (req1 , spider )
874- self .assertIsNone (req2 )
767+
768+ def test_adjust_timeout ():
769+ mw = _get_mw ()
770+ req1 = scrapy .Request ("http://example.com" , meta = {
771+ 'splash' : {'args' : {'timeout' : 60 , 'html' : 1 }},
772+
773+ # download_timeout is always present,
774+ # it is set by DownloadTimeoutMiddleware
775+ 'download_timeout' : 30 ,
776+ })
777+ req1 = mw .process_request (req1 , None )
778+ assert req1 .meta ['download_timeout' ] > 60
779+
780+ req2 = scrapy .Request ("http://example.com" , meta = {
781+ 'splash' : {'args' : {'html' : 1 }},
782+ 'download_timeout' : 30 ,
783+ })
784+ req2 = mw .process_request (req2 , None )
785+ assert req2 .meta ['download_timeout' ] == 30
0 commit comments