import scrapy from nettuts.items import NettutsItem class MySpider(scrapy.Spider): name = 'nettuts' allowed_domains = ["w3.org"] start_url = ["https://www.w3.org/"] def parse(self, response): for sel in response.xpath('//*[@id="w3c_home_upcoming_events"]/ul/li'): item = NettutsItem() item['title'] = sel.xpath('/a/text()').extract() yield item