Привет всем! Почему паук не переходит по страницам использую правило(что не так делаю), тут код:
# -*- encoding: utf-8 -*-
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import HtmlXPathSelector
from job.items import JobItem
import time
class JobSpider(CrawlSpider):
name = 'superjob'
allowed_domains = ['superjob.ru']
start_urls = [
'http://www.superjob.ru/vacancy/search/?t%5B0%5D=4&sbmit=1&period=7'
]
rules = [
Rule(SgmlLinkExtractor(allow='/vacancy/search/?',
restrict_xpaths=(
u'//a[@class="h_border_none"]/<span>следующая</span>')),
callback='parse',
follow=True),
]
def parse(self, response):
hxs = HtmlXPathSelector(response)
titles = hxs.select(
'//*[@id="ng-app"]/div[2]/div/div[2]/div/div[1]/div[2]/div/div/h2/a')
items = []
count = 0
for title in titles:
item = JobItem()
item['title'] = title.select('//h2/a/text()').extract()
items.append(item)
count += 1
name_time_file = time.strftime("%Y-%m-%d-%H-%M")
data_file = open('{}.csv'.format(name_time_file), 'w')
data_file.write(str(count))
data_file.close()
# return items