Вот код парсера. По неопытности, причин, кроме кодировки и разных версий питона не вижу, но что-то мне подсказывает, что дело не в этом..
import lxml.html
import re
#----------------------------------------------------------------------
def parse(p_url):
x_path = "//*[starts-with(@id, 'pr2cart_')]/td/div[2]"
HTMLtree = lxml.html.parse(p_url)
articles = HTMLtree.xpath(x_path + "/p[1]/text()")
titles = HTMLtree.xpath(x_path + "/span/a/text()")
counts = HTMLtree.xpath(x_path + "/p[2]/span/@title")
prices = HTMLtree.xpath(x_path + "/*//td[1]/b/text()")
descrs = HTMLtree.xpath("//*[starts-with(@id, 's_desc_')]")
s_lines = []
for cod, prod, price, count, descr in zip(articles, titles, prices, counts, descrs):
s_lines.append("{0};{1};{2};{3};{4}\n".format(re.findall('([0-9]+)', cod)[0],
' '.join(prod.split()),
re.findall('([0-9.]+)\s', price)[0],
re.findall('([0-9]+)', count)[0],
re.sub('[\r\n\t]', '', descr.text_content())))
return s_lines
#----------------------------------------------------------------------
def save_csv(p_filename, p_data):
F = open('csv\{0}.csv'.format(p_filename), 'w', encoding='utf-8')
for line in p_data:
F.write(line)
F.close()
#----------------------------------------------------------------------
def main():
""" variables """
url = "http://www.url.ru/position~{0}.html"
cameras = {'pos1':62,
'pos2':69,
'pos3':157,
'pos4':44,
'pos5':19,
'pos6':2,
'pos7':45,
'pos8':11,
'pos9':1}
""" code """
for cam in cameras:
s_lines = parse(url.format(cameras[cam]))
save_csv("{0}_{1}".format(cam, len(s_lines)), s_lines)
#----------------------------------------------------------------------
if __name__ == "__main__":
main()