re_title = re.compile(r"<title*>(.+?)</title>", re.IGNORECASE | re.DOTALL) ... s = str(content) print(s[0:1000]) print(re_title.findall(s)) print(re_title.search(s).group(1)) #.encode("cp866"))
—
вывод:
b'<!DOCTYPE html><html class=“i-ua_js_no i-ua_css_standart i-ua_browser_unknown i-ua_pseudo_yes” lang=“ru”><head xmlns
g="http://ogp.me/ns#“><meta http-equiv=”X-UA-Compatible“ content=”IE=edge“><title>\xd0\xaf\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81</title><meta http-equiv=Content-Type content=”text/html;charset=UTF-8“><link rel=”apple-touch-icon“ href=”//yastatic.net/morda-logo/i/apple-touch-icon/ru-76x76.png“ sizes=”76x76“><link rel=”apple-touch-icon“ href=”//yastatic.net/morda-logo/i/apple-touch-icon/ru-120x120.png“ sizes=”120x120“><link rel=”apple-touch-icon“ href=”//yastatic.net/morda-logo/i/apple-touch-icon/ru-152x152.png“ sizes=”152x152“><link rel=”apple-touch-icon“ href=”//yastatic.net/morda-logo/i/apple-touch-icon/ru-180x180.png“ sizes=”180x180“><link rel=”alternate“ type=”application/rss+xml“ title=”\xd0\x9d\xd0\xbe\xd0\xb2\xd0\xbe\xd1\x81\xd1\x82\xd0\xb8 \xd0\xaf\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81\xd0\xb0“ href=”//company.yandex.ru/news/news.rss“><link rel=”alternate" \xd0\xaf\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81