[Python-checkins] python/dist/src/Lib urllib.py, 1.165.2.1, 1.165.2.2
rhettinger@users.sourceforge.net
rhettinger at users.sourceforge.net
Thu Sep 15 07:28:28 CEST 2005
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31892
Modified Files:
Tag: release24-maint
urllib.py
Log Message:
Sync-up with patches to the head.
Includes SF 1016880: urllib.urlretrieve silently truncates downloads
and the performance fix-ups.
Index: urllib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/urllib.py,v
retrieving revision 1.165.2.1
retrieving revision 1.165.2.2
diff -u -d -r1.165.2.1 -r1.165.2.2
--- urllib.py 26 Aug 2005 08:51:39 -0000 1.165.2.1
+++ urllib.py 15 Sep 2005 05:28:25 -0000 1.165.2.2
@@ -86,6 +86,11 @@
if _urlopener:
_urlopener.cleanup()
+# exception raised when downloaded size does not match content-length
+class ContentTooShortError(IOError):
+ def __init__(self, message, content):
+ IOError.__init__(self, message)
+ self.content = content
ftpcache = {}
class URLopener:
@@ -228,6 +233,7 @@
self.tempcache[url] = result
bs = 1024*8
size = -1
+ read = 0
blocknum = 0
if reporthook:
if "content-length" in headers:
@@ -237,6 +243,7 @@
block = fp.read(bs)
if block == "":
break
+ read += len(block)
tfp.write(block)
blocknum += 1
if reporthook:
@@ -245,6 +252,12 @@
tfp.close()
del fp
del tfp
+
+ # raise exception if actual size does not match content-length header
+ if size >= 0 and read < size:
+ raise ContentTooShortError("retrieval incomplete: got only %i out "
+ "of %i bytes" % (read, size), result)
+
return result
# Each method named open_<type> knows how to open that type of URL
@@ -409,7 +422,11 @@
def open_local_file(self, url):
"""Use local file."""
- import mimetypes, mimetools, email.Utils, StringIO
+ import mimetypes, mimetools, email.Utils
+ try:
+ from cStringIO import StringIO
+ except ImportError:
+ from StringIO import StringIO
host, file = splithost(url)
localname = url2pathname(file)
try:
@@ -419,7 +436,7 @@
size = stats.st_size
modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
mtype = mimetypes.guess_type(url)[0]
- headers = mimetools.Message(StringIO.StringIO(
+ headers = mimetools.Message(StringIO(
'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
(mtype or 'text/plain', size, modified)))
if not host:
@@ -440,7 +457,11 @@
def open_ftp(self, url):
"""Use FTP protocol."""
- import mimetypes, mimetools, StringIO
+ import mimetypes, mimetools
+ try:
+ from cStringIO import StringIO
+ except ImportError:
+ from StringIO import StringIO
host, path = splithost(url)
if not host: raise IOError, ('ftp error', 'no host given')
host, port = splitport(host)
@@ -489,7 +510,7 @@
headers += "Content-Type: %s\n" % mtype
if retrlen is not None and retrlen >= 0:
headers += "Content-Length: %d\n" % retrlen
- headers = mimetools.Message(StringIO.StringIO(headers))
+ headers = mimetools.Message(StringIO(headers))
return addinfourl(fp, headers, "ftp:" + url)
except ftperrors(), msg:
raise IOError, ('ftp error', msg), sys.exc_info()[2]
@@ -503,7 +524,11 @@
# mediatype := [ type "/" subtype ] *( ";" parameter )
# data := *urlchar
# parameter := attribute "=" value
- import StringIO, mimetools
+ import mimetools
+ try:
+ from cStringIO import StringIO
+ except ImportError:
+ from StringIO import StringIO
try:
[type, data] = url.split(',', 1)
except ValueError:
@@ -529,7 +554,7 @@
msg.append('')
msg.append(data)
msg = '\n'.join(msg)
- f = StringIO.StringIO(msg)
+ f = StringIO(msg)
headers = mimetools.Message(f, 0)
f.fileno = None # needed for addinfourl
return addinfourl(f, headers, url)
@@ -696,8 +721,11 @@
global _noheaders
if _noheaders is None:
import mimetools
- import StringIO
- _noheaders = mimetools.Message(StringIO.StringIO(), 0)
+ try:
+ from cStringIO import StringIO
+ except ImportError:
+ from StringIO import StringIO
+ _noheaders = mimetools.Message(StringIO(), 0)
_noheaders.fp.close() # Recycle file descriptor
return _noheaders
@@ -1021,23 +1049,18 @@
return selector[1], selector[2:]
return None, selector
+_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
+_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+
def unquote(s):
"""unquote('abc%20def') -> 'abc def'."""
- mychr = chr
- myatoi = int
- list = s.split('%')
- res = [list[0]]
- myappend = res.append
- del list[0]
- for item in list:
- if item[1:2]:
- try:
- myappend(mychr(myatoi(item[:2], 16))
- + item[2:])
- except ValueError:
- myappend('%' + item)
- else:
- myappend('%' + item)
+ res = s.split('%')
+ for i in xrange(1, len(res)):
+ item = res[i]
+ try:
+ res[i] = _hextochr[item[:2]] + item[2:]
+ except KeyError:
+ res[i] = '%' + item
return "".join(res)
def unquote_plus(s):
@@ -1048,22 +1071,7 @@
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
-
-_fast_safe_test = always_safe + '/'
-_fast_safe = None
-
-def _fast_quote(s):
- global _fast_safe
- if _fast_safe is None:
- _fast_safe = {}
- for c in _fast_safe_test:
- _fast_safe[c] = c
- res = list(s)
- for i in range(len(res)):
- c = res[i]
- if not c in _fast_safe:
- res[i] = '%%%02X' % ord(c)
- return ''.join(res)
+_safemaps = {}
def quote(s, safe = '/'):
"""quote('abc def') -> 'abc%20def'
@@ -1086,25 +1094,25 @@
called on a path where the existing slash characters are used as
reserved characters.
"""
- safe = always_safe + safe
- if _fast_safe_test == safe:
- return _fast_quote(s)
- res = list(s)
- for i in range(len(res)):
- c = res[i]
- if c not in safe:
- res[i] = '%%%02X' % ord(c)
+ cachekey = (safe, always_safe)
+ try:
+ safe_map = _safemaps[cachekey]
+ except KeyError:
+ safe += always_safe
+ safe_map = {}
+ for i in range(256):
+ c = chr(i)
+ safe_map[c] = (c in safe) and c or ('%%%02X' % i)
+ _safemaps[cachekey] = safe_map
+ res = map(safe_map.__getitem__, s)
return ''.join(res)
def quote_plus(s, safe = ''):
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
if ' ' in s:
- l = s.split(' ')
- for i in range(len(l)):
- l[i] = quote(l[i], safe)
- return '+'.join(l)
- else:
- return quote(s, safe)
+ s = quote(s, safe + ' ')
+ return s.replace(' ', '+')
+ return quote(s, safe)
def urlencode(query,doseq=0):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
More information about the Python-checkins
mailing list