>>> import re >>> str='<a href="1.ru">11111</a>, <a href="2.ru">222222</a>' >>> re.findall(r'<a.*>.*</a>', str) ['<a href="1.ru">11111</a>, <a href="2.ru">222222</a>'] >>>
>>> import re >>> str='<a href="1.ru">11111</a>, <a href="2.ru">222222</a>' >>> re.findall(r'<a.*>.*</a>', str) ['<a href="1.ru">11111</a>, <a href="2.ru">222222</a>'] >>>
re.findall(r'<a[^>]*>[^<]*</a>', str)
re.findall(r'<a.*?>.*?</a>', str)
re.findall(r'<a[^>]*>[^<]*</a>', str)
>>> import lxml.html >>> >>> s = """<a href="1.ru">11111</a>, <a href="2.ru">222222</a>""" >>> >>> html = lxml.html.fromstring(s) >>> data = [(node.attrib['href'], node.text) ... for node in html.iterchildren('a')] >>> print(data) [('1.ru', '11111'), ('2.ru', '222222')] >>>