Форум сайта python.su
Добрый вечер, всем!
Подскажите, пожалуйста, как вывести список без лишних символов
import urllib from bs4 import BeautifulSoup BASEURL = 'https://jobs.tut.by/search/vacancy?text=Logistics&enable_snippets=true&clusters=true¤cy_code=BYR&area=16&only_with_salary=true&from=cluster_compensation' def get_html(url): response = urllib.request.urlopen(url) return response.read() def parse(html): soup = BeautifulSoup(html, "lxml") table = soup.find('table', class_='l l_auto') for row in table.find_all('div', class_='b-vacancy-list-salary'): cols = row.find_all('meta', itemprop ='baseSalary') print(cols) def main(): parse(get_html(BASEURL)) if __name__ == '__main__': main()
[<meta content="1500" itemprop="baseSalary"/>] [<meta content="1750" itemprop="baseSalary"/>] [<meta content="2200" itemprop="baseSalary"/>] [<meta content="756" itemprop="baseSalary"/>] [<meta content="1000" itemprop="baseSalary"/>] [<meta content="600" itemprop="baseSalary"/>] [<meta content="1000" itemprop="baseSalary"/>] [<meta content="400" itemprop="baseSalary"/>] [<meta content="500" itemprop="baseSalary"/>] [<meta content="600" itemprop="baseSalary"/>] [<meta content="1000" itemprop="baseSalary"/>] [<meta content="500" itemprop="baseSalary"/>] [<meta content="700" itemprop="baseSalary"/>] [<meta content="1000" itemprop="baseSalary"/>] [<meta content="450" itemprop="baseSalary"/>] [<meta content="470" itemprop="baseSalary"/>] [<meta content="500" itemprop="baseSalary"/>] [<meta content="610" itemprop="baseSalary"/>] [<meta content="750" itemprop="baseSalary"/>] [<meta content="275" itemprop="baseSalary"/>]
Отредактировано Antonpython (Авг. 23, 2017 23:45:56)
Офлайн
print(cols[-1]['content'], cols[-1]['itemprop'])
Офлайн
import requests from lxml.html import fromstring URL = ("https://jobs.tut.by/search/vacancy?text=Logistics&enable_snippets=true&" "clusters=true¤cy_code=BYR&area=16&only_with_salary=true&from=cluster_compensation") headers= { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0" } req = requests.get(URL, headers=headers) doc = fromstring(req.content) for meta in doc.xpath("//meta[@itemprop='baseSalary']/@content"): print(meta)
Офлайн
Спасибо Вам за рабочие варианты, в первом случае код нужно убрать после запятой
print(cols[-1]['content'])
Офлайн