Форум сайта python.su
Пытаюсь выдрать текст, используя рег. выражение, под тегом
<nobr>
soup = BeautifulSoup(resArrow, 'lxml') tbl = soup.find('tbl', id='maintbl') for nobr in tbl.find_all('nobr', text=re.compile("MB")): # ничего не находится
<nobr>1895.35 MB<br/><a href="down.php?id=555"><b>2,55 tlm</b></a></nobr>
print (nobr.getText())
1895.35 MB2,55 tlm
for nobr in tbl.find_all('nobr', text=re.compile(".*MB.*")):
for nobr in tbl.find_all('nobr', text=re.compile("\d*.\d\d\s+MB")):
for nobr in tbl.find_all('nobr', text="1895.35 MB2,55 tlm"):
for nobr in tbl.find_all('nobr', text="1895.35 MB"):
<nobr>
for nobr in tbl.find_all('nobr'): if "MB" in nobr.getText(): # так работает
Отредактировано The_Immortal (Сен. 9, 2020 20:40:33)
Офлайн
https://beautiful-soup-4.readthedocs.io/en/latest/index.html?highlight=find_all#find-all
>>> import bs4 >>> >>> text = """ ... <nobr><br/><a href="down.php?id=555"><b>2,55 tlm</b></a></nobr> ... <nobr>1895.35 MB<br/><a href="down.php?id=555"><b>2,55 tlm</b></a></nobr> ... <nobr><br/><a href="down.php?id=555"><b>2,56 tlm</b></a></nobr> ... <nobr><br/><a href="down.php?id=555"><b>2,57 tlm</b></a></nobr> ... <nobr>1896.36 MB<br/><a href="down.php?id=556"><b>2,58 tlm</b></a></nobr> ... """ >>> >>> soup = bs4.BeautifulSoup(text, 'html.parser') >>> >>> soup(lambda tag: tag.name == 'nobr' and 'MB' in tag.getText()) [<nobr>1895.35 MB<br/><a href="down.php?id=555"><b>2,55 tlm</b></a></nobr>, <nobr>1896.36 MB<br/><a href="down.php?id=556"><b>2,58 tlm</b></a></nobr>] >>> >>> [i.getText() for i in soup(lambda tag: tag.name == 'nobr' and 'MB' in tag.getText())] ['1895.35 MB2,55 tlm', '1896.36 MB2,58 tlm'] >>>
The_ImmortalМожет быть, ошибка в BeautifulSoup.
однако очень хочется разобраться в чём может быть проблема
>>> import bs4 >>> >>> text = """ ... <nobr><br/><a href="down.php?id=555"><b>2,55 tlm</b></a></nobr> ... <nobr>1895.35 MB<br/><a href="down.php?id=555"><b>2,55 tlm</b></a></nobr> ... <nobr><br/><a href="down.php?id=555"><b>2,56 tlm</b></a></nobr> ... <nobr><br/><a href="down.php?id=555"><b>2,57 tlm</b></a></nobr> ... <nobr>1896.36 MB<br/><a href="down.php?id=556"><b>2,58 tlm</b></a></nobr> ... """ >>> >>> soup = bs4.BeautifulSoup(text, 'html.parser') >>> >>> soup('b', string=lambda i: '58' in i) [<b>2,58 tlm</b>] >>> >>> soup('nobr', string=lambda i: 'MB' in i) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/usr/lib/python3.6/site-packages/bs4/element.py", line 1041, in __call__ return self.find_all(*args, **kwargs) File "/usr/lib/python3.6/site-packages/bs4/element.py", line 1313, in find_all return self._find_all(name, attrs, text, limit, generator, **kwargs) File "/usr/lib/python3.6/site-packages/bs4/element.py", line 556, in _find_all found = strainer.search(i) File "/usr/lib/python3.6/site-packages/bs4/element.py", line 1704, in search found = self.search_tag(markup) File "/usr/lib/python3.6/site-packages/bs4/element.py", line 1684, in search_tag if found and self.text and not self._matches(found.string, self.text): File "/usr/lib/python3.6/site-packages/bs4/element.py", line 1736, in _matches return match_against(markup) File "<stdin>", line 1, in <lambda> TypeError: argument of type 'NoneType' is not iterable >>>
Отредактировано py.user.next (Сен. 9, 2020 23:58:39)
Офлайн
import bs4 text = """ <nobr><br/><a href="down.php?id=555"><b>2,55 tlm</b></a></nobr> <nobr>1895.35 MB<br/><a href="down.php?id=555"><b>2,55 tlm</b></a></nobr> <nobr><br/><a href="down.php?id=555"><b>2,56 tlm</b></a></nobr> <nobr><br/><a href="down.php?id=555"><b>2,57 tlm</b></a></nobr> <nobr>1896.36 MB<br/><a href="down.php?id=556"><b>2,58 tlm</b></a></nobr> """ soup = bs4.BeautifulSoup(text, 'html.parser') print soup.find_all(lambda tag: 'MB' in tag.text)
Офлайн