Как исправить код, неработает юзерагент и прокси?
import csv
import random
import fake_useragent
import requests
from bs4 import BeautifulSoup as bs
def get_html(url):
with open('proxy.txt', 'r') as f:
proxy = [line.strip() for line in f if line]
while True:
try:
proxy = random.choice(proxy)
# print(rand_proxy)
user_agent = fake_useragent.UserAgent()
user = user_agent.random
headers = {'User-Agent': int(user)}
# print(headers)
r = requests.get(url, headers=headers, proxies=proxy, timeout=2, params='')
if r.status_code == 200:
break
except Exception as e:
print(type(e), e)
def get_page_links(html):
soup = bs(html, 'lxml')
ads = soup.find('div', class_='col-md-12 search-result').find_all('div', class_='search-result_item')
all_links = []
for ad in ads:
link = 'https://tourism.gov.ru' + ad.find('a', class_='search-result_item_link').get('href')
all_links.append(link)
return all_links
def get_page_data(html):
soup = bs(html, 'lxml')
try:
title = soup.find('div', class_='col-sm-9 col-xs-12 content').find('h1').text
except Exception:
title = ''
data = {'Название': title}
return data
def write_csv(data):
with open('any.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow((data["Название"]))
def main():
urls_list = []
for i in range(1, 3):
url = 'https://tourism.gov.ru/operators/' + f'?PAGEN_1={i}'
links = get_page_links(get_html(url))
urls_list.append(links)
for urls in urls_list:
for url in urls:
data = get_page_data(get_html(url))
write_csv(data)
print(data)
if __name__ == '__main__':
main()