Форум сайта python.su
Ребят, помогите пож разобраться что не так в парсере?
Скрипт проходит страницы, но почему то не сохраняет почту в файлик. Что не так?
# -*- coding: utf-8 -*- import requests from lxml import html import pandas as pd from math import ceil import openpyxl all_pages = ['https://www.ua-region.info/kved/Ind.15'] page = requests.get(all_pages[0]) tree = html.fromstring(page.content) items='2139' #items = tree.xpath('//div[@class ="b-items-total"]')[0].text.split()[-1] last_page = int(ceil(float(items) / 10)) count = 1 for i in range(1, last_page): count += 1 all_pages.append('{0}&start_page={1}'.format(all_pages[0], count)) print('{0}&start_page={1}'.format(all_pages[0], count)) print('{} pages found'.format(len(all_pages))) all_links = [] def get_links(url): parsed_links = [] page = requests.get(url) tree = html.fromstring(page.content) links = tree.xpath('//h2[@itemprop="name"]/a') for lnk in links: parsed_links.append('https://www.ua-region.info{}'.format(lnk.get('href'))) print('https://www.ua-region.info{}'.format(lnk.get('href'))) return parsed_links all_links = [] for url in all_pages: all_links += get_links(url) mails = [] def get_mail(): try: mail = [i.text.encode('utf-8') for i in tree.xpath('//td[@itemprop="email"]/a')] mails.append(', '.join(mail)) except Exception as e: print(e) print('mail not found') mails.append('') cntr = 0 for url in all_pages: try: root = requests.get(url) tree = html.fromstring(root.content) get_mail() cntr += 1 print('{} pages have been parsed'.format(cntr)) except Exception as e: print(e) df = pd.DataFrame({ "e-mail": mails, }, columns=["e-mail"]) df.drop_duplicates(subset=["e-mail"], inplace=True) writer = pd.ExcelWriter('Agro.xlsx', engine='openpyxl') df.to_excel(writer, index=False) writer.save()
Офлайн