File tree Expand file tree Collapse file tree 1 file changed +31
-0
lines changed
example/scrashtest/spiders Expand file tree Collapse file tree 1 file changed +31
-0
lines changed Original file line number Diff line number Diff line change 1+ # -*- coding: utf-8 -*-
2+ from urlparse import urljoin
3+ import json
4+
5+ import scrapy
6+ from scrapy .contrib .linkextractors import LinkExtractor
7+
8+
9+ class DmozSpider (scrapy .Spider ):
10+ name = "js_spider"
11+ start_urls = ['http://www.isjavascriptenabled.com/' ]
12+ splash = {'args' : {'har' : 1 , 'html' : 1 }}
13+
14+ def parse (self , response ):
15+ is_js = response .xpath ("//h1/text()" ).extract ()
16+ if "" .join (is_js ).lower () == "yes" :
17+ self .log ("JS enabled!" )
18+ else :
19+ self .log ("Error! JS disabled!" , scrapy .log .ERROR )
20+ le = LinkExtractor ()
21+
22+ for link in le .extract_links (response ):
23+ url = urljoin (response .url , link .url )
24+ yield scrapy .Request (url , self .parse_link )
25+ break
26+
27+ def parse_link (self , response ):
28+ title = response .xpath ("//title" ).extract ()
29+ yes = response .xpath ("//h1" ).extract ()
30+ self .log ("response is: {}" .format (repr (response )))
31+ self .log (u"Html in response contains {} {}" .format ("" .join (title ), "" .join (yes )))
You can’t perform that action at this time.
0 commit comments