Форум сайта python.su
ребят, помогите пожалуйста извлечь данные
есть страница интернет-магазина. нужно вытащить описание товара и поместить его в словарь следующего вида:
data['Product Composition'] = '93% Polyamide 7% Elastane Lining: 100% Polyester</p><p>Dress Length: 90 cm' data['Product Attributes;'] = ': Boat Neck, Long Sleeve, Midi, Zip, Concealed, Laced, Side Lining Type: Full Lining'
import lxml.html import requests import pprint def get_doc(url): try: req = requests.get(url) except requests.exceptions.ConnectionError as exc: print('A Connection error occurred. ', exc) else: doc_html = req.text doc_obj = lxml.html.document_fromstring(doc_html) return doc_html, doc_obj req = get_doc('http://en.modagram.com/women/open-back-velvet-dress-claret/detail/37803/168598') doc_html, doc_obj = req details = {} base_details_query = doc_obj.xpath('//section[@id="ProductInfo"]/div[contains(concat(" ", normalize-space(@class), " "), " TabItem ")]') if len(base_details_query): base_details_query = base_details_query[0].xpath('//p/strong') if len(base_details_query): for dict_title in base_details_query: p = dict_title.getparent() print(dict_title, dict_title.text, p, p.text, '-----') desc = [] while p is not None and not p.xpath('.//strong'): desc.append(p.text) p = p.getnext() details[dict_title.text] = " ".join(desc) pprint.pprint(details)
Отредактировано zlodiak (Март 10, 2014 21:45:26)
Офлайн