Skip to content

Commit 385d567

Browse files
pawelmhmGallaecio
authored andcommitted
[scrashtest] add another test spider
1 parent 391cda2 commit 385d567

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# -*- coding: utf-8 -*-
2+
from urlparse import urljoin
3+
import json
4+
5+
import scrapy
6+
from scrapy.contrib.linkextractors import LinkExtractor
7+
8+
9+
class DmozSpider(scrapy.Spider):
10+
name = "js_spider"
11+
start_urls = ['http://www.isjavascriptenabled.com/']
12+
splash = {'args': {'har': 1, 'html': 1}}
13+
14+
def parse(self, response):
15+
is_js = response.xpath("//h1/text()").extract()
16+
if "".join(is_js).lower() == "yes":
17+
self.log("JS enabled!")
18+
else:
19+
self.log("Error! JS disabled!", scrapy.log.ERROR)
20+
le = LinkExtractor()
21+
22+
for link in le.extract_links(response):
23+
url = urljoin(response.url, link.url)
24+
yield scrapy.Request(url, self.parse_link)
25+
break
26+
27+
def parse_link(self, response):
28+
title = response.xpath("//title").extract()
29+
yes = response.xpath("//h1").extract()
30+
self.log("response is: {}".format(repr(response)))
31+
self.log(u"Html in response contains {} {}".format("".join(title), "".join(yes)))

0 commit comments

Comments
 (0)