Форум сайта python.su
Здравствуйте.
Есть скрипт парсера который читает новости нескольких десятков сайтов каждые пять минут(примерно).
Вот часть кода на котором показаны два сайта.
def parse_gubernia(url): domen = "tv-gubernia.ru" headers = { 'authority': domen, 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36', 'sec-fetch-dest': 'document', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'navigate', 'sec-fetch-user': '?1', 'accept-language': 'en-US,en;q=0.9', } session = requests.session() response = session.get(url, headers=headers, ) if response.status_code == 200: print("Success") else: print("Bad result") soup = BeautifulSoup(response.text, 'html.parser') posts = [] for element in soup.find_all("div", class_="news-item news-item-double"): try: title = element.find("span", class_="news-item-title").text.strip() print(title) link = domen+element.find("a", class_="news-desc-long").get("href") print(link) posts.append([title, link]) except Exception as error: print(error) return posts[0] def parse_bloknot(url): domen = "bloknot-voronezh.ru" headers = { 'authority': domen, 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36', 'sec-fetch-dest': 'document', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'navigate', 'sec-fetch-user': '?1', 'accept-language': 'en-US,en;q=0.9', } session = requests.session() response = session.get(url, headers=headers, ) if response.status_code == 200: print("Success") else: print("Bad result") soup = BeautifulSoup(response.text, 'html.parser') posts = [] for element in soup.find_all('div', class_='tripleline'): try: title = element.find("a", class_="linksys").text.strip() print(title) link = domen+element.find("a", class_="linksys").get("href") print(link) posts.append([title, link]) except Exception as error: print(error) return posts[0]
Отредактировано i8080 (Фев. 9, 2022 12:52:47)
Офлайн
857
Ты можешь породить исключение внутри функции. На любую ситуацию ты можешь порождать соответствующее ей исключение. Исключения можно создавать.
Пример
>>> class NoSiteConnection(ValueError): ... pass ... >>> def connect(url): ... if 'some' in url: ... raise NoSiteConnection("Can't connect to " + url) ... else: ... return 'Connected to ' + url ... >>> >>> urls = ( ... 'https://www.site1.com', ... 'https://www.site2.com', ... 'https://www.site3.com', ... 'https://www.some-site.com', ... 'https://www.site4.com' ... ) >>> >>> for i in urls: ... try: ... res = connect(i) ... print(res) ... except NoSiteConnection as e: ... print('error: ' + str(e)) ... Connected to https://www.site1.com Connected to https://www.site2.com Connected to https://www.site3.com error: Can't connect to https://www.some-site.com Connected to https://www.site4.com >>>
Офлайн