from bs4 import BeautifulSoup
import requests
url = 'https://parsinger.ru/html/index3_page_1.html'
def get_html(url):
resp = requests.get(url)
if resp.status_code == 200:
return resp.content.decode('utf-8')
def get_product_info(block):
out = dict.fromkeys(['img', 'name', 'description', 'price'])
img_box = block.find(class_='img_box')
if img_box:
img = img_box.find('img')
if img:
out['img'] = img['src']
name_item = img_box.find(class_='name_item')
if name_item:
out['name'] = name_item.text.strip()
description = block.find(class_='description')
if description:
data = (i.split(':', 1) for i in description.text.strip().splitlines())
out['description'] = {k.lower().strip(): v.lower().strip() for k, v in data}
price = block.find(class_='price')
if price:
v, n = price.text.split()
out['price'] = int(v), n
return out
def get_data_from_html(html):
soup = BeautifulSoup(html, 'lxml')
out = []
items = (i for i in soup.find_all('div', class_='item'))
for i in items:
out.append(get_product_info(i))
return out
html = get_html(url)
result = get_data_from_html(html)
print(result[0])
print("============")
print([i['name'] for i in result if i['description']['тип'] == 'мышь проводная'])
print("============")
print([i['name'] for i in result if i['description']['тип'] == 'мышь беспроводная'])
print("============")
print([i['name'] for i in result if i['price'][0] < 600])
{'img': 'https://parsinger.ru/img/3/1.jpg', 'name': 'Vampire RGB,9 кнопок', 'description': {'бренд': 'defender', 'тип': 'мышь проводная', 'подключение к компьютеру': 'usb', 'игровая': 'да'}, 'price': (1610, 'руб')}
============
['Vampire RGB,9 кнопок', 'Defender Halo Z GM-430L', 'Defender sTarx GM-390L', 'Defender Skull GM-180L', 'Defender Killer GM-170L', 'Defender Ghost GM-190L', 'Defender Witcher GM-990']
============
['Defender Shark 2']
============
['Defender Halo Z GM-430L', 'Defender Skull GM-180L', 'Defender Killer GM-170L']
Process finished with exit code 0