From 880c0a05f65a7618319c759d4f04193ffae4eb89 Mon Sep 17 00:00:00 2001 From: Andrii Kohut Date: Thu, 9 Oct 2014 18:00:06 +0300 Subject: [PATCH 1/2] new xpaths for livingsocial --- .../scraper_app/spiders/livingsocial_spider.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scrape/living_social/scraper_app/spiders/livingsocial_spider.py b/scrape/living_social/scraper_app/spiders/livingsocial_spider.py index 85c11c48..3091a94e 100644 --- a/scrape/living_social/scraper_app/spiders/livingsocial_spider.py +++ b/scrape/living_social/scraper_app/spiders/livingsocial_spider.py @@ -24,13 +24,13 @@ class LivingSocialSpider(BaseSpider): start_urls = ["http://www.livingsocial.com/cities/15-san-francisco"] deals_list_xpath = '//li[@dealid]' - item_fields = {'title': './/a/div[@class="deal-bottom"]/h3[@itemprop]/text()', + item_fields = {'title': './/a/div[@class="deal-details"]/h2[@itemprop]/text()', 'link': './/a/@href', - 'description': './/a/div[@class="deal-bottom"]/p/text()', - 'category': './/a/div[@class="deal-top"]/div[@class="deal-category"]/span/text()', - 'location': './/a/div[@class="deal-top"]/ul[@class="unstyled deal-info"]/li/text()', - 'original_price': './/a/div[@class="deal-bottom"]/ul[@class="unstyled deal-info"]/li[@class="deal-original"]/del/text()', - 'price': './/a/div[@class="deal-bottom"]/ul[@class="unstyled deal-info"]/li[@class="deal-price"]/text()'} + 'description': './/a/div[@class="deal-details"]/p[@class="description"]/text()', + 'category': './/a/div[@class="deal-image"]/p[@class="deal-category"]/text()', + 'location': './/a/div[@class="deal-details"]/p[@class="location"]/text()', + 'original_price': './/a/div[@class="deal-prices"]/div[@class="deal-strikethrough-price"]/text()', + 'price': './/a/div[@class="deal-prices"]/div[@class="deal-price"]/text()'} def parse(self, response): """ From ff1eab11ad8d8e0a3a2af4c2d33fa57dfa820df2 Mon Sep 17 00:00:00 2001 From: Andrii Kohut Date: Thu, 9 Oct 2014 18:02:17 +0300 Subject: [PATCH 2/2] a little bit more readable item_fields --- .../scraper_app/spiders/livingsocial_spider.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/scrape/living_social/scraper_app/spiders/livingsocial_spider.py b/scrape/living_social/scraper_app/spiders/livingsocial_spider.py index 3091a94e..c0f77eeb 100644 --- a/scrape/living_social/scraper_app/spiders/livingsocial_spider.py +++ b/scrape/living_social/scraper_app/spiders/livingsocial_spider.py @@ -24,13 +24,15 @@ class LivingSocialSpider(BaseSpider): start_urls = ["http://www.livingsocial.com/cities/15-san-francisco"] deals_list_xpath = '//li[@dealid]' - item_fields = {'title': './/a/div[@class="deal-details"]/h2[@itemprop]/text()', - 'link': './/a/@href', - 'description': './/a/div[@class="deal-details"]/p[@class="description"]/text()', - 'category': './/a/div[@class="deal-image"]/p[@class="deal-category"]/text()', - 'location': './/a/div[@class="deal-details"]/p[@class="location"]/text()', - 'original_price': './/a/div[@class="deal-prices"]/div[@class="deal-strikethrough-price"]/text()', - 'price': './/a/div[@class="deal-prices"]/div[@class="deal-price"]/text()'} + item_fields = { + 'title': './/a/div[@class="deal-details"]/h2[@itemprop]/text()', + 'link': './/a/@href', + 'description': './/a/div[@class="deal-details"]/p[@class="description"]/text()', + 'category': './/a/div[@class="deal-image"]/p[@class="deal-category"]/text()', + 'location': './/a/div[@class="deal-details"]/p[@class="location"]/text()', + 'original_price': './/a/div[@class="deal-prices"]/div[@class="deal-strikethrough-price"]/text()', + 'price': './/a/div[@class="deal-prices"]/div[@class="deal-price"]/text()', + } def parse(self, response): """