Форум сайта python.su
Здравствуйте.
Мне нужно скачать файл со странички http://forums.thinkingwithportals.com/downloads.php?view=detail&df_id=470
Проблема в том, что прямой ссылки нет, а есть кнопка download
<form action="./downloads.php" method="post" name="download" >
<input type="hidden" name="file_version" value="0" /> » <input type="submit" name="submit" value="Download" class="button1" />
<input type="hidden" name="hotlink_id" value="6fede94291739792ae7c734d60e20cc9" />
<input type="hidden" name="df_id" value="478" />
<input type="hidden" name="modcp" value="0" />
<input type="hidden" name="cat_id" value="10" />
<input type="hidden" name="hotlink_id" value="6fede94291739792ae7c734d60e20cc9" />
<input type="hidden" name="view" value="load" />
</form>
#----------------------------------------------------------------------
#
# Author: Laszlo Nagy
#
# Copyright: (c) 2005 by Szoftver Messias Bt.
# Licence: BSD style
#
#
#----------------------------------------------------------------------
import os
from hashlib import md5
import urllib2
import mimetypes
import cookielib
import logging
class MozillaCacher(object):
"""A dictionary like object, that can cache results on a storage device."""
def __init__(self,cachedir='.cache'):
self.cachedir = cachedir
if not os.path.isdir(cachedir):
self.logger.debug('Create cache dir')
os.mkdir(cachedir)
def name2fname(self,name):
return os.path.join(self.cachedir,name)
def __getitem__(self,name):
if not isinstance(name,str):
raise TypeError()
fname = self.name2fname(name)
if os.path.isfile(fname):
return file(fname,'rb').read()
else:
raise IndexError()
def __setitem__(self,name,value):
if not isinstance(name,str):
raise TypeError()
fname = self.name2fname(name)
if os.path.isfile(fname):
os.unlink(fname)
f = file(fname,'wb+')
try:
f.write(value)
finally:
f.close()
def __delitem__(self,name):
if not isinstance(name,str):
raise TypeError()
fname = self.name2fname(name)
if os.path.isfile(fname):
os.unlink(fname)
def __iter__(self):
raise NotImplementedError()
def has_key(self,name):
return os.path.isfile(self.name2fname(name))
class MozillaEmulator(object):
def __init__(self,cacher=None,trycount=0):
"""Create a new MozillaEmulator object.
@param cacher: A dictionary like object, that can cache search results on a storage device.
You can use a simple dictionary here, but it is not recommended.
You can also put None here to disable caching completely.
@param trycount: The download() method will retry the operation if it fails. You can specify -1 for infinite retrying.
A value of 0 means no retrying. A value of 1 means one retry. etc."""
self.cacher = cacher
self.cookies = cookielib.CookieJar()
self.trycount = trycount
self.debug = False
self.logger = logging.getLogger("libs.MozillaEmulator")
def _hash(self,data):
h = md5()
h.update(data)
return h.hexdigest()
def build_opener(self,url,postdata=None,extraheaders=None,forbid_redirect=False):
txheaders = {
'Accept':'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
'Accept-Language':'en,hu;q=0.8,en-us;q=0.5,hu-hu;q=0.3',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Keep-Alive': '300',
'Connection': 'keep-alive',
}
if not (extraheaders is None):
for key,value in extraheaders.iteritems():
txheaders[key] = value
req = urllib2.Request(url, postdata, txheaders)
self.cookies.add_cookie_header(req)
if forbid_redirect:
redirector = HTTPNoRedirector()
else:
redirector = urllib2.HTTPRedirectHandler()
http_handler = urllib2.HTTPHandler(debuglevel=self.debug)
https_handler = urllib2.HTTPSHandler(debuglevel=self.debug)
u = urllib2.build_opener(http_handler,https_handler,urllib2.HTTPCookieProcessor(self.cookies),redirector)
u.addheaders = [('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; hu-HU; rv:1.7.8) Gecko/20050511 Firefox/1.0.4')]
if not postdata is None:
req.add_data(postdata)
return (req,u)
def download(self,url,postdata=None,extraheaders=None,forbid_redirect=False,
trycount=None,fd=None,onprogress=None,only_head=False,retry=True):
"""Download an URL with GET or POST methods.
@param postdata: It can be a string that will be POST-ed to the URL.
When None is given, the method will be GET instead.
@param extraheaders: You can add/modify HTTP headers with a dict here.
@param forbid_redirect: Set this flag if you do not want to handle
HTTP 301 and 302 redirects.
@param trycount: Specify the maximum number of retries here.
0 means no retry on error. Using -1 means infinite retring.
None means the default value (that is self.trycount).
@param fd: You can pass a file descriptor here. In this case,
the data will be written into the file. Please note that
when you save the raw data into a file then it won't be cached.
@param onprogress: A function that has two parameters:
the size of the resource and the downloaded size. This will be
called for each 1KB chunk. (If the HTTP header does not contain
the content-length field, then the size parameter will be zero!)
@param only_head: Create the openerdirector and return it. In other
words, this will not retrieve any content except HTTP headers.
@return: The raw HTML page data, unless fd was specified. When fd
was given, the return value is undefined.
"""
if trycount is None:
trycount = self.trycount
cnt = 0
while True:
try:
key = self._hash(url)
if ((self.cacher is None) or (not self.cacher.has_key(key)) or retry):
req,u = self.build_opener(url,postdata,extraheaders,forbid_redirect)
try:
openerdirector = u.open(req)
except urllib2.URLError, e:
self.logger.exception("Can't open connection")
self.logger.error(e)
self.logger.debug(req.get_method()+' '+url)
self.logger.debug(openerdirector.code+' '+openerdirector.msg)
self.logger.debug(openerdirector.headers)
self.cookies.extract_cookies(openerdirector,req)
if only_head:
return openerdirector
if openerdirector.headers.has_key('content-length'):
length = long(openerdirector.headers['content-length'])
else:
length = 0
dlength = 0
if fd:
while True:
data = openerdirector.read(1024)
if not data:
break
dlength += len(data)
fd.write(data)
if onprogress:
onprogress(length,dlength)
else:
data = ''
while True:
newdata = openerdirector.read(1024)
if not newdata:
break
dlength += len(newdata)
data += newdata
if onprogress:
onprogress(length,dlength)
if not (self.cacher is None):
self.cacher[key] = data
else:
data = self.cacher[key]
return data
except urllib2.URLError:
self.logger.debug("MozillaEmulator: urllib2.URLError, retryting %i ",cnt)
cnt += 1
if (trycount > -1) and (trycount < cnt):
raise
def post_multipart(self,url,fields, files, fields2=None, forbid_redirect=False, onprogress=None):
"""Post fields and files to an http host as multipart/form-data.
fields is a sequence of (name, value) elements for regular form fields.
files is a sequence of (name, filename, value) elements for data to be uploaded as files
Return the server's response page.
"""
content_type, post_data = encode_multipart_formdata(fields, files, fields2)
result = self.download(url,post_data,{
'Content-Type': content_type,
'Content-Length': str(len(post_data))
},forbid_redirect=forbid_redirect,trycount=1, onprogress=onprogress
)
return result
class HTTPNoRedirector(urllib2.HTTPRedirectHandler):
"""This is a custom http redirect handler that FORBIDS redirection."""
def http_error_302(self, req, fp, code, msg, headers):
e = urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
if e.code in (301,302):
if 'location' in headers:
newurl = headers.getheaders('location')[0]
elif 'uri' in headers:
newurl = headers.getheaders('uri')[0]
e.newurl = newurl
raise e
def encode_multipart_formdata(fields, files):
"""
fields is a sequence of (name, value) elements for regular form fields.
files is a sequence of (name, filename, value) elements for data to be uploaded as files
Return (content_type, body) ready for httplib.HTTP instance
"""
BOUNDARY = '-----------------------------2385384675469946141908084969'
CRLF = '\r\n'
L = []
for (key, value) in fields:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"' % key)
L.append('')
L.append(value)
for (key, filename, value) in files:
L.append('--' + BOUNDARY)
L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
L.append('Content-Type: %s' % get_content_type(filename))
L.append('')
L.append(value)
L.append('--' + BOUNDARY + '--')
L.append('')
body = CRLF.join(L)
content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
return content_type, body
def get_content_type(filename):
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
s = MozillaEmulator()
url = 'http://forums.thinkingwithportals.com/downloads.php?view=detail&df_id=478'
f = open(r'C:\ddd/gg.hz', 'wb')
s.download(url,fd = f,extraheaders = {'action':"./downloads.php", 'method':"post", 'name':"download"})
f.close()
Офлайн
sp3Что значит не получилось? В чем конретная проблема?
Но ввиду полного отсутствия знаний в этой области загрузить файл у меня не получилось :(
Офлайн
Поставил tamper data на firefox
Оказывается кнопка перенаправляет на другой ресурс с прямой ссылкой для загрузки.
Сейчас проблема получения заголовков ответа на пост запрос
запрос при нажатии кнопки на http://forums.thinkingwithportals.com/downloads.php?view=detail&df_id=470
Host=forums.thinkingwithportals.com
User-Agent=Mozilla/5.0 (Windows; U; Windows NT 6.1; ru; rv:1.9.2.17) Gecko/20110420 Firefox/3.6.17
Accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language=ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3
Accept-Encoding=gzip,deflate
Accept-Charset=windows-1251,utf-8;q=0.7,*;q=0.7
Keep-Alive=115
Connection=keep-alive
Referer=http://forums.thinkingwithportals.com/downloads.php?view=detail&df_id=470
Cookie=phpbb3_hwh84_u=1; phpbb3_hwh84_k=; phpbb3_hwh84_sid=15adbcde4fb2a39c341f92d11b08e9af; __utma=234265473.673183499.1305960446.1305979668.1305984523.4; __utmz=234265473.1305960446.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmc=234265473; __utmb=234265473.1.10.1305984523
Content-Type=application/x-www-form-urlencoded
Content-Length=156
POSTDATA=file_version=0&submit=Download&hotlink_id=59d466de085c73c82f931842f08e3f7e&df_id=470&modcp=0&cat_id=10&hotlink_id=59d466de085c73c82f931842f08e3f7e&view=load
Status=Found - 302
Date=Sat, 21 May 2011 13:29:48 GMT
Server=Apache/2.2.9 (Debian) PHP/5.2.6-1+lenny10 with Suhosin-Patch
X-Powered-By=PHP/5.2.6-1+lenny10
Content-Disposition=attachment; filename="sp_enrichment_2.zip"
Location=http://mirror.pointysoftware.net/thinkingwithportals/downloads/portal2/maps/sp_enrichment_2.zip
Content-Length=0
Keep-Alive=timeout=15
Connection=Keep-Alive
Content-Type=application/octet-stream
import urllib2
url = 'http://forums.thinkingwithportals.com/downloads.php?view=detail&df_id=470'
postdata='file_version=0&submit=Download&hotlink_id=59d466de085c73c82f931842f08e3f7e&df_id=470&modcp=0&cat_id=10&hotlink_id=59d466de085c73c82f931842f08e3f7e&view=load'
txheaders = {
'Accept':'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
'Accept-Language':'en,hu;q=0.8,en-us;q=0.5,hu-hu;q=0.3',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Keep-Alive': '300',
'Connection': 'keep-alive'}
req = urllib2.Request(url, postdata, txheaders)
r = urllib2.urlopen(req)
print r.headers
Офлайн