Форум сайта python.su
Привет всем! Почему паук не переходит по страницам использую правило(что не так делаю), тут код:
# -*- encoding: utf-8 -*- from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor from scrapy.contrib.spiders import CrawlSpider, Rule from scrapy.selector import HtmlXPathSelector from job.items import JobItem import time class JobSpider(CrawlSpider): name = 'superjob' allowed_domains = ['superjob.ru'] start_urls = [ 'http://www.superjob.ru/vacancy/search/?t%5B0%5D=4&sbmit=1&period=7' ] rules = [ Rule(SgmlLinkExtractor(allow='/vacancy/search/?', restrict_xpaths=( u'//a[@class="h_border_none"]/<span>следующая</span>')), callback='parse', follow=True), ] def parse(self, response): hxs = HtmlXPathSelector(response) titles = hxs.select( '//*[@id="ng-app"]/div[2]/div/div[2]/div/div[1]/div[2]/div/div/h2/a') items = [] count = 0 for title in titles: item = JobItem() item['title'] = title.select('//h2/a/text()').extract() items.append(item) count += 1 name_time_file = time.strftime("%Y-%m-%d-%H-%M") data_file = open('{}.csv'.format(name_time_file), 'w') data_file.write(str(count)) data_file.close() # return items
Офлайн