html="<p class='one'><b>111</b></p><p name='two'>222</p><path>333</path><p>444</p><path class='three'>555</path>" tempTag=[] tempTag+=re.findall("<p\s.*?>(.*?)</p>", html) tempTag+=re.findall("<p\s?>(.*?)</p>", html) print(tempTag)
html="<p class='one'><b>111</b></p><p name='two'>222</p><path>333</path><p>444</p><path class='three'>555</path>" tempTag=[] tempTag+=re.findall("<p\s.*?>(.*?)</p>", html) tempTag+=re.findall("<p\s?>(.*?)</p>", html) print(tempTag)
from lxml import html htm="<p class='one'><b>111</b></p><p name='two'>222</p><path>333</path><p>444</p><path class='three'>555</path>" tree = html.fromstring(htm) tags = tree.xpath('//p/b | //p') out = [t.text for t in tags if t.text] print(out)