[code python]
import urllib.request
import re
stranica = urllib.request.urlopen("http://www.mail.ru/").read()
f = open("Sot.html", "wb")
f.write(stranica)
f.close()
r = r'(?#Protocol)(?:(?:ht|f)tp(?:s?)\:\/\/|~\/|\/)?(?#Username:Password)(?:\w+:\w+@)?(?#Subdomains)(?:(?:[-\w]+\.)+(?#TopLevel Domains)(?:com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum|travel|[a-z]{2}))(?#Port)(?::[\d]{1,5})?(?#Directories)(?:(?:(?:\/(?:[-\w~!$+|.,=]|%[a-f\d]{2})+)+|\/)+|\?|#)?(?#Query)(?:(?:\?(?:[-\w~!$+|.,*:]|%[a-f\d{2}])+=?(?:[-\w~!$+|.,*:=]|%[a-f\d]{2})*)(?:&(?:[-\w~!$+|.,*:]|%[a-f\d{2}])+=?(?:[-\w~!$+|.,*:=]|%[a-f\d]{2})*)*)*(?#Anchor)(?:#(?:[-\w~!$+|.,*:=]|%[a-f\d]{2})*)?'
#txt = u"""hi world!you url http://pravda-vsay.ya.ru/#y5__id37 and url http://google.com/"""
txt = open(r'Sot.html',"r").read()
for silki in re.findall(r, txt, re.U):
print(silki)
[/code]
Подскажите, как парсить ссылки не сохраняя страницу в фал?