[Python-checkins] python/dist/src/Lib httplib.py,1.34.2.3,1.34.2.4 urlparse.py,1.29,1.29.4.1

Tue, 02 Jul 2002 13:42:52 -0700

Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv14132/Lib

Modified Files:
      Tag: release21-maint
	httplib.py urlparse.py 
Log Message:
Backport various bug fixes from trunk.

The 2.1 maintenance branch is now identical to the trunk through rev
1.54 of httplib.py.

Index: httplib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/httplib.py,v
retrieving revision 1.34.2.3
retrieving revision 1.34.2.4
diff -C2 -d -r1.34.2.3 -r1.34.2.4
*** httplib.py	9 Apr 2002 00:39:10 -0000	1.34.2.3
--- httplib.py	2 Jul 2002 20:42:50 -0000	1.34.2.4
***************
*** 67,72 ****
  """

! import socket
  import mimetools

  try:
--- 67,74 ----
  """

! import errno
  import mimetools
+ import socket
+ from urlparse import urlsplit

  try:
***************
*** 77,84 ****
  __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
             "HTTPException", "NotConnected", "UnknownProtocol",
!            "UnknownTransferEncoding", "IllegalKeywordArgument",
!            "UnimplementedFileMode", "IncompleteRead",
!            "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader",
!            "ResponseNotReady", "BadStatusLine", "error"]

  HTTP_PORT = 80
--- 79,86 ----
  __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
             "HTTPException", "NotConnected", "UnknownProtocol",
!            "UnknownTransferEncoding", "UnimplementedFileMode",
!            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
!            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
!            "BadStatusLine", "error"]

  HTTP_PORT = 80
***************
*** 110,118 ****
          self.will_close = _UNKNOWN      # conn will close at end of response

!     def begin(self):
!         if self.msg is not None:
!             # we've already started reading the response
!             return
! 
          line = self.fp.readline()
          if self.debuglevel > 0:
--- 112,116 ----
          self.will_close = _UNKNOWN      # conn will close at end of response

!     def _read_status(self):
          line = self.fp.readline()
          if self.debuglevel > 0:
***************
*** 134,144 ****
          # The status code is a three-digit number
          try:
!             self.status = status = int(status)
              if status < 100 or status > 999:
                  raise BadStatusLine(line)
          except ValueError:
              raise BadStatusLine(line)
!         self.reason = reason.strip()

          if version == 'HTTP/1.0':
              self.version = 10
--- 132,162 ----
          # The status code is a three-digit number
          try:
!             status = int(status)
              if status < 100 or status > 999:
                  raise BadStatusLine(line)
          except ValueError:
              raise BadStatusLine(line)
!         return version, status, reason
! 
!     def _begin(self):
!         if self.msg is not None:
!             # we've already started reading the response
!             return

+         # read until we get a non-100 response
+         while 1:
+             version, status, reason = self._read_status()
+             if status != 100:
+                 break
+             # skip the header from the 100 response
+             while 1:
+                 skip = self.fp.readline().strip()
+                 if not skip:
+                     break
+                 if self.debuglevel > 0:
+                     print "header:", skip
+             
+         self.status = status
+         self.reason = reason.strip()
          if version == 'HTTP/1.0':
              self.version = 10
***************
*** 151,154 ****
--- 169,173 ----

          if self.version == 9:
+             self.chunked = 0
              self.msg = mimetools.Message(StringIO())
              return
***************
*** 232,235 ****
--- 251,255 ----

          if self.chunked:
+             assert self.chunked != _UNKNOWN
              chunk_left = self.chunk_left
              value = ''
***************
*** 346,350 ****
              i = host.find(':')
              if i >= 0:
!                 port = int(host[i+1:])
                  host = host[:i]
              else:
--- 366,373 ----
              i = host.find(':')
              if i >= 0:
!                 try:
!                     port = int(host[i+1:])
!                 except ValueError:
!                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
                  host = host[:i]
              else:
***************
*** 395,399 ****
              raise

!     def putrequest(self, method, url):
          """Send a request to the server.

--- 418,422 ----
              raise

!     def putrequest(self, method, url, skip_host=0):
          """Send a request to the server.

***************
*** 446,461 ****
              # Issue some standard headers for better HTTP/1.1 compliance

!             # this header is issued *only* for HTTP/1.1 connections. more
!             # specifically, this means it is only issued when the client uses
!             # the new HTTPConnection() class. backwards-compat clients will
!             # be using HTTP/1.0 and those clients may be issuing this header
!             # themselves. we should NOT issue it twice; some web servers (such
!             # as Apache) barf when they see two Host: headers

!             # if we need a non-standard port,include it in the header
!             if self.port == HTTP_PORT:
!                 self.putheader('Host', self.host)
!             else:
!                 self.putheader('Host', "%s:%s" % (self.host, self.port))

              # note: we are assuming that clients will not attempt to set these
--- 469,497 ----
              # Issue some standard headers for better HTTP/1.1 compliance

!             if not skip_host:
!                 # this header is issued *only* for HTTP/1.1
!                 # connections. more specifically, this means it is
!                 # only issued when the client uses the new
!                 # HTTPConnection() class. backwards-compat clients
!                 # will be using HTTP/1.0 and those clients may be
!                 # issuing this header themselves. we should NOT issue
!                 # it twice; some web servers (such as Apache) barf
!                 # when they see two Host: headers

!                 # If we need a non-standard port,include it in the
!                 # header.  If the request is going through a proxy,
!                 # but the host of the actual URL, not the host of the
!                 # proxy.
! 
!                 netloc = ''
!                 if url.startswith('http'):
!                     nil, netloc, nil, nil, nil = urlsplit(url)
! 
!                 if netloc:
!                     self.putheader('Host', netloc)
!                 elif self.port == HTTP_PORT:
!                     self.putheader('Host', self.host)
!                 else:
!                     self.putheader('Host', "%s:%s" % (self.host, self.port))

              # note: we are assuming that clients will not attempt to set these
***************
*** 515,519 ****

      def _send_request(self, method, url, body, headers):
!         self.putrequest(method, url)

          if body:
--- 551,562 ----

      def _send_request(self, method, url, body, headers):
!         # If headers already contains a host header, then define the
!         # optional skip_host argument to putrequest().  The check is
!         # harder because field names are case insensitive.
!         if 'Host' in (headers
!             or [k for k in headers.iterkeys() if k.lower() == "host"]):
!             self.putrequest(method, url, skip_host=1)
!         else:
!             self.putrequest(method, url)

          if body:
***************
*** 557,561 ****
              response = self.response_class(self.sock)

!         response.begin()
          self.__state = _CS_IDLE

--- 600,605 ----
              response = self.response_class(self.sock)

!         response._begin()
!         assert response.will_close != _UNKNOWN
          self.__state = _CS_IDLE

***************
*** 569,572 ****
--- 613,693 ----
          return response

+ class SSLFile:
+     """File-like object wrapping an SSL socket."""
+ 
+     BUFSIZE = 8192
+     
+     def __init__(self, sock, ssl, bufsize=None):
+         self._sock = sock
+         self._ssl = ssl
+         self._buf = ''
+         self._bufsize = bufsize or self.__class__.BUFSIZE
+ 
+     def _read(self):
+         buf = ''
+         # put in a loop so that we retry on transient errors
+         while 1:
+             try:
+                 buf = self._ssl.read(self._bufsize)
+             except socket.sslerror, err:
+                 if (err[0] == socket.SSL_ERROR_WANT_READ
+                     or err[0] == socket.SSL_ERROR_WANT_WRITE):
+                     continue
+                 if (err[0] == socket.SSL_ERROR_ZERO_RETURN
+                     or err[0] == socket.SSL_ERROR_EOF):
+                     break
+                 raise
+             except socket.error, err:
+                 if err[0] == errno.EINTR:
+                     continue
+                 if err[0] == errno.EBADF:
+                     # XXX socket was closed?
+                     break
+                 raise
+             else:
+                 break
+         return buf
+ 
+     def read(self, size=None):
+         L = [self._buf]
+         avail = len(self._buf)
+         while size is None or avail < size:
+             s = self._read()
+             if s == '':
+                 break
+             L.append(s)
+             avail += len(s)
+         all = "".join(L)
+         if size is None:
+             self._buf = ''
+             return all
+         else:
+             self._buf = all[size:]
+             return all[:size]
+ 
+     def readline(self):
+         L = [self._buf]
+         self._buf = ''
+         while 1:
+             i = L[-1].find("\n")
+             if i >= 0:
+                 break
+             s = self._read()
+             if s == '':
+                 break
+             L.append(s)
+         if i == -1:
+             # loop exited because there is no more data
+             return "".join(L)
+         else:
+             all = "".join(L)
+             # XXX could do enough bookkeeping not to do a 2nd search
+             i = all.find("\n") + 1
+             line = all[:i]
+             self._buf = all[i:]
+             return line
+ 
+     def close(self):
+         self._sock.close()

  class FakeSocket:
***************
*** 576,600 ****

      def makefile(self, mode, bufsize=None):
-         """Return a readable file-like object with data from socket.
- 
-         This method offers only partial support for the makefile
-         interface of a real socket.  It only supports modes 'r' and
-         'rb' and the bufsize argument is ignored.
- 
-         The returned object contains *all* of the file data
-         """
          if mode != 'r' and mode != 'rb':
              raise UnimplementedFileMode()
! 
!         msgbuf = []
!         while 1:
!             try:
!                 buf = self.__ssl.read()
!             except socket.sslerror, msg:
!                 break
!             if buf == '':
!                 break
!             msgbuf.append(buf)
!         return StringIO("".join(msgbuf))

      def send(self, stuff, flags = 0):
--- 697,703 ----

      def makefile(self, mode, bufsize=None):
          if mode != 'r' and mode != 'rb':
              raise UnimplementedFileMode()
!         return SSLFile(self.__sock, self.__ssl, bufsize)

      def send(self, stuff, flags = 0):
***************
*** 616,634 ****
      default_port = HTTPS_PORT

!     def __init__(self, host, port=None, **x509):
!         keys = x509.keys()
!         try:
!             keys.remove('key_file')
!         except ValueError:
!             pass
!         try:
!             keys.remove('cert_file')
!         except ValueError:
!             pass
!         if keys:
!             raise IllegalKeywordArgument()
          HTTPConnection.__init__(self, host, port)
!         self.key_file = x509.get('key_file')
!         self.cert_file = x509.get('cert_file')

      def connect(self):
--- 719,726 ----
      default_port = HTTPS_PORT

!     def __init__(self, host, port=None, key_file=None, cert_file=None):
          HTTPConnection.__init__(self, host, port)
!         self.key_file = key_file
!         self.cert_file = cert_file

      def connect(self):
***************
*** 654,658 ****
      _connection_class = HTTPConnection

!     def __init__(self, host='', port=None, **x509):
          "Provide a default host, since the superclass requires one."

--- 746,750 ----
      _connection_class = HTTPConnection

!     def __init__(self, host='', port=None):
          "Provide a default host, since the superclass requires one."

***************
*** 664,679 ****
          # an error when we attempt to connect. Presumably, the client code
          # will call connect before then, with a proper host.
!         self._conn = self._connection_class(host, port)
          # set up delegation to flesh out interface
!         self.send = self._conn.send
!         self.putrequest = self._conn.putrequest
!         self.endheaders = self._conn.endheaders
!         self._conn._http_vsn = self._http_vsn
!         self._conn._http_vsn_str = self._http_vsn_str

!         # we never actually use these for anything, but we keep them here for
!         # compatibility with post-1.5.2 CVS.
!         self.key_file = x509.get('key_file')
!         self.cert_file = x509.get('cert_file')

          self.file = None
--- 756,772 ----
          # an error when we attempt to connect. Presumably, the client code
          # will call connect before then, with a proper host.
!         self._setup(self._connection_class(host, port))
! 
!     def _setup(self, conn):
!         self._conn = conn
! 
          # set up delegation to flesh out interface
!         self.send = conn.send
!         self.putrequest = conn.putrequest
!         self.endheaders = conn.endheaders
!         self.set_debuglevel = conn.set_debuglevel

!         conn._http_vsn = self._http_vsn
!         conn._http_vsn_str = self._http_vsn_str

          self.file = None
***************
*** 686,692 ****
          self._conn.connect()

-     def set_debuglevel(self, debuglevel):
-         self._conn.set_debuglevel(debuglevel)
- 
      def getfile(self):
          "Provide a getfile, since the superclass' does not use this concept."
--- 779,782 ----
***************
*** 746,749 ****
--- 836,852 ----
          _connection_class = HTTPSConnection

+         def __init__(self, host='', port=None, **x509):
+             # provide a default host, pass the X509 cert info
+ 
+             # urf. compensate for bad input.
+             if port == 0:
+                 port = None
+             self._setup(self._connection_class(host, port, **x509))
+ 
+             # we never actually use these for anything, but we keep them
+             # here for compatibility with post-1.5.2 CVS.
+             self.key_file = x509.get('key_file')
+             self.cert_file = x509.get('cert_file')
+ 

  class HTTPException(Exception):
***************
*** 753,756 ****
--- 856,862 ----
      pass

+ class InvalidURL(HTTPException):
+     pass
+ 
  class UnknownProtocol(HTTPException):
      def __init__(self, version):
***************
*** 760,766 ****
      pass

- class IllegalKeywordArgument(HTTPException):
-     pass
- 
  class UnimplementedFileMode(HTTPException):
      pass
--- 866,869 ----
***************
*** 823,827 ****
          for header in headers.headers: print header.strip()
      print
!     print h.getfile().read()

      if hasattr(socket, 'ssl'):
--- 926,941 ----
          for header in headers.headers: print header.strip()
      print
!     print "read", len(h.getfile().read())
! 
!     # minimal test that code to extract host from url works
!     class HTTP11(HTTP):
!         _http_vsn = 11
!         _http_vsn_str = 'HTTP/1.1'
! 
!     h = HTTP11('www.python.org')
!     h.putrequest('GET', 'http://www.python.org/~jeremy/')
!     h.endheaders()
!     h.getreply()
!     h.close()

      if hasattr(socket, 'ssl'):
***************
*** 833,836 ****
--- 947,951 ----
          hs.endheaders()
          status, reason, headers = hs.getreply()
+         # XXX why does this give a 302 response?
          print 'status =', status
          print 'reason =', reason
***************
*** 839,843 ****
              for header in headers.headers: print header.strip()
          print
!         print hs.getfile().read()

--- 954,958 ----
              for header in headers.headers: print header.strip()
          print
!         print "read", len(hs.getfile().read())

Index: urlparse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/urlparse.py,v
retrieving revision 1.29
retrieving revision 1.29.4.1
diff -C2 -d -r1.29 -r1.29.4.1
*** urlparse.py	1 Mar 2001 04:27:19 -0000	1.29
--- urlparse.py	2 Jul 2002 20:42:50 -0000	1.29.4.1
***************
*** 44,48 ****

! def urlparse(url, scheme = '', allow_fragments = 1):
      """Parse a URL into 6 components:
      <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
--- 44,48 ----

! def urlparse(url, scheme='', allow_fragments=1):
      """Parse a URL into 6 components:
      <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
***************
*** 50,53 ****
--- 50,76 ----
      Note that we don't break the components up in smaller bits
      (e.g. netloc is a single string) and we don't expand % escapes."""
+     tuple = urlsplit(url, scheme, allow_fragments)
+     scheme, netloc, url, query, fragment = tuple
+     if scheme in uses_params and ';' in url:
+         url, params = _splitparams(url)
+     else:
+         params = ''
+     return scheme, netloc, url, params, query, fragment
+ 
+ def _splitparams(url):
+     if '/'  in url:
+         i = url.find(';', url.rfind('/'))
+         if i < 0:
+             return url, ''
+     else:
+         i = url.find(';')
+     return url[:i], url[i+1:]
+ 
+ def urlsplit(url, scheme='', allow_fragments=1):
+     """Parse a URL into 5 components:
+     <scheme>://<netloc>/<path>?<query>#<fragment>
+     Return a 5-tuple: (scheme, netloc, path, query, fragment).
+     Note that we don't break the components up in smaller bits
+     (e.g. netloc is a single string) and we don't expand % escapes."""
      key = url, scheme, allow_fragments
      cached = _parse_cache.get(key, None)
***************
*** 56,60 ****
      if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
          clear_cache()
!     netloc = path = params = query = fragment = ''
      i = url.find(':')
      if i > 0:
--- 79,83 ----
      if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
          clear_cache()
!     netloc = query = fragment = ''
      i = url.find(':')
      if i > 0:
***************
*** 65,85 ****
                  i = url.find('/', 2)
                  if i < 0:
!                     i = len(url)
                  netloc = url[2:i]
                  url = url[i:]
!             if allow_fragments:
!                 i = url.rfind('#')
!                 if i >= 0:
!                     fragment = url[i+1:]
!                     url = url[:i]
!             i = url.find('?')
!             if i >= 0:
!                 query = url[i+1:]
!                 url = url[:i]
!             i = url.find(';')
!             if i >= 0:
!                 params = url[i+1:]
!                 url = url[:i]
!             tuple = scheme, netloc, url, params, query, fragment
              _parse_cache[key] = tuple
              return tuple
--- 88,101 ----
                  i = url.find('/', 2)
                  if i < 0:
!                     i = url.find('#')
!                     if i < 0:
!                         i = len(url)
                  netloc = url[2:i]
                  url = url[i:]
!             if allow_fragments and '#' in url:
!                 url, fragment = url.split('#', 1)
!             if '?' in url:
!                 url, query = url.split('?', 1)
!             tuple = scheme, netloc, url, query, fragment
              _parse_cache[key] = tuple
              return tuple
***************
*** 95,111 ****
                  i = len(url)
              netloc, url = url[2:i], url[i:]
!     if allow_fragments and scheme in uses_fragment:
!         i = url.rfind('#')
!         if i >= 0:
!             url, fragment = url[:i], url[i+1:]
!     if scheme in uses_query:
!         i = url.find('?')
!         if i >= 0:
!             url, query = url[:i], url[i+1:]
!     if scheme in uses_params:
!         i = url.find(';')
!         if i >= 0:
!             url, params = url[:i], url[i+1:]
!     tuple = scheme, netloc, url, params, query, fragment
      _parse_cache[key] = tuple
      return tuple
--- 111,119 ----
                  i = len(url)
              netloc, url = url[2:i], url[i:]
!     if allow_fragments and scheme in uses_fragment and '#' in url:
!         url, fragment = url.split('#', 1)
!     if scheme in uses_query and '?' in url:
!         url, query = url.split('?', 1)
!     tuple = scheme, netloc, url, query, fragment
      _parse_cache[key] = tuple
      return tuple
***************
*** 116,119 ****
--- 124,132 ----
      originally had redundant delimiters, e.g. a ? with an empty query
      (the draft states that these are equivalent)."""
+     if params:
+         url = "%s;%s" % (url, params)
+     return urlunsplit((scheme, netloc, url, query, fragment))
+ 
+ def urlunsplit((scheme, netloc, url, query, fragment)):
      if netloc or (scheme in uses_netloc and url[:2] == '//'):
          if url and url[:1] != '/': url = '/' + url
***************
*** 121,126 ****
      if scheme:
          url = scheme + ':' + url
-     if params:
-         url = url + ';' + params
      if query:
          url = url + '?' + query
--- 134,137 ----
***************
*** 188,194 ****
      empty string.
      """
!     s, n, p, a, q, frag = urlparse(url)
!     defrag = urlunparse((s, n, p, a, q, ''))
!     return defrag, frag

--- 199,208 ----
      empty string.
      """
!     if '#' in url:
!         s, n, p, a, q, frag = urlparse(url)
!         defrag = urlunparse((s, n, p, a, q, ''))
!         return defrag, frag
!     else:
!         return url, ''