[Python-checkins] CVS: python/dist/src/Lib urllib.py,1.104,1.105

Jeremy Hylton python-dev@python.org
Thu, 14 Sep 2000 09:59:10 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory slayer.i.sourceforge.net:/tmp/cvs-serv4185

Modified Files:
	urllib.py 
Log Message:
Remove "," from the list of always_safe characters.  It is a reserved
character according to RFC 2396. Add some text to quote doc string
that explains the quoting rules better.

This closes SF Bug #114427.

Add _fast_quote operation that uses a dictionary instead of a list
when the standard set of safe characters is used.



Index: urllib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/urllib.py,v
retrieving revision 1.104
retrieving revision 1.105
diff -C2 -r1.104 -r1.105
*** urllib.py	2000/08/31 15:48:09	1.104
--- urllib.py	2000/09/14 16:59:06	1.105
***************
*** 427,431 ****
          if dirs and not dirs[0]: dirs = dirs[1:]
          if dirs and not dirs[0]: dirs[0] = '/'
!         key = (user, host, port, string.joinfields(dirs, '/'))
          # XXX thread unsafe!
          if len(self.ftpcache) > MAXFTPCACHE:
--- 427,431 ----
          if dirs and not dirs[0]: dirs = dirs[1:]
          if dirs and not dirs[0]: dirs[0] = '/'
!         key = user, host, port, string.join(dirs, '/')
          # XXX thread unsafe!
          if len(self.ftpcache) > MAXFTPCACHE:
***************
*** 1014,1022 ****
  always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 
                 'abcdefghijklmnopqrstuvwxyz'
!                '0123456789' '_,.-')
  def quote(s, safe = '/'):
!     """quote('abc def') -> 'abc%20def'."""
!     # XXX Can speed this up an order of magnitude
      safe = always_safe + safe
      res = list(s)
      for i in range(len(res)):
--- 1014,1059 ----
  always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 
                 'abcdefghijklmnopqrstuvwxyz'
!                '0123456789' '_.-')
! 
! _fast_safe_test = always_safe + '/'
! _fast_safe = None
! 
! def _fast_quote(s):
!     global _fast_safe
!     if _fast_safe is None:
!         _fast_safe = {}
!         for c in _fast_safe_test:
!             _fast_safe[c] = c
!     res = list(s)
!     for i in range(len(res)):
!         c = res[i]
!         if not _fast_safe.has_key(c):
!             res[i] = '%%%02x' % ord(c)
!     return string.join(res, '')
! 
  def quote(s, safe = '/'):
!     """quote('abc def') -> 'abc%20def'
!     
!     Each part of a URL, e.g. the path info, the query, etc., has a
!     different set of reserved characters that must be quoted.
! 
!     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
!     the following reserved characters.
! 
!     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
!                   "$" | ","
! 
!     Each of these characters is reserved in some component of a URL,
!     but not necessarily in all of them.
! 
!     By default, the quote function is intended for quoting the path
!     section of a URL.  Thus, it will not encode '/'.  This character
!     is reserved, but in typical usage the quote function is being
!     called on a path where the existing slash characters are used as
!     reserved characters.
!     """
      safe = always_safe + safe
+     if _fast_safe_test == safe:
+         return _fast_quote(s)
      res = list(s)
      for i in range(len(res)):
***************
*** 1024,1033 ****
          if c not in safe:
              res[i] = '%%%02x' % ord(c)
!     return string.joinfields(res, '')
  
! def quote_plus(s, safe = '/'):
!     # XXX Can speed this up an order of magnitude
      if ' ' in s:
-         # replace ' ' with '+'
          l = string.split(s, ' ')
          for i in range(len(l)):
--- 1061,1069 ----
          if c not in safe:
              res[i] = '%%%02x' % ord(c)
!     return string.join(res, '')
  
! def quote_plus(s, safe = ''):
!     """Quote the query fragment of a URL; replacing ' ' with '+'"""
      if ' ' in s:
          l = string.split(s, ' ')
          for i in range(len(l)):