[Python-checkins] r70638 - in python/branches/py3k: Doc/library/http.client.rst Lib/http/client.py Lib/test/test_httplib.py

jeremy.hylton python-checkins at python.org
Fri Mar 27 21:24:35 CET 2009


Author: jeremy.hylton
Date: Fri Mar 27 21:24:34 2009
New Revision: 70638

Log:
Fix some string encoding issues with entity bodies in HTTP requests.

RFC 2616 says that iso-8859-1 is the default charset for HTTP entity
bodies, but we encoded strings using ascii.  See
http://bugs.python.org/issue5314.  Changed docs and code to use
iso-8859-1.

Also fix some brokenness with passing a file as the body instead of a
string.

Add tests to show that some of this behavior actually works.



Modified:
   python/branches/py3k/Doc/library/http.client.rst
   python/branches/py3k/Lib/http/client.py
   python/branches/py3k/Lib/test/test_httplib.py

Modified: python/branches/py3k/Doc/library/http.client.rst
==============================================================================
--- python/branches/py3k/Doc/library/http.client.rst	(original)
+++ python/branches/py3k/Doc/library/http.client.rst	Fri Mar 27 21:24:34 2009
@@ -351,14 +351,22 @@
 
 .. method:: HTTPConnection.request(method, url[, body[, headers]])
 
-   This will send a request to the server using the HTTP request method *method*
-   and the selector *url*.  If the *body* argument is present, it should be a
-   string of data to send after the headers are finished. Alternatively, it may
-   be an open file object, in which case the contents of the file is sent; this
-   file object should support ``fileno()`` and ``read()`` methods. The header
-   Content-Length is automatically set to the correct value. The *headers*
-   argument should be a mapping of extra HTTP headers to send with the request.
+   This will send a request to the server using the HTTP request
+   method *method* and the selector *url*.  If the *body* argument is
+   present, it should be string or bytes object of data to send after
+   the headers are finished.  Strings are encoded as ISO-8859-1, the
+   default charset for HTTP.  To use other encodings, pass a bytes
+   object.  The Content-Length header is set to the length of the
+   string.
+
+   The *body* may also be an open file object, in which case the
+   contents of the file is sent; this file object should support
+   ``fileno()`` and ``read()`` methods. The header Content-Length is
+   automatically set to the length of the file as reported by
+   stat.
 
+   The *headers* argument should be a mapping of extra HTTP
+   headers to send with the request.
 
 .. method:: HTTPConnection.getresponse()
 

Modified: python/branches/py3k/Lib/http/client.py
==============================================================================
--- python/branches/py3k/Lib/http/client.py	(original)
+++ python/branches/py3k/Lib/http/client.py	Fri Mar 27 21:24:34 2009
@@ -243,7 +243,6 @@
         if line in (b'\r\n', b'\n', b''):
             break
     hstring = b''.join(headers).decode('iso-8859-1')
-
     return email.parser.Parser(_class=_class).parsestr(hstring)
 
 class HTTPResponse(io.RawIOBase):
@@ -675,13 +674,22 @@
         if self.debuglevel > 0:
             print("send:", repr(str))
         try:
-            blocksize=8192
-            if hasattr(str,'read') :
-                if self.debuglevel > 0: print("sendIng a read()able")
-                data=str.read(blocksize)
-                while data:
+            blocksize = 8192
+            if hasattr(str, "read") :
+                if self.debuglevel > 0:
+                    print("sendIng a read()able")
+                encode = False
+                if "b" not in str.mode:
+                    encode = True
+                    if self.debuglevel > 0:
+                        print("encoding file using iso-8859-1")
+                while 1:
+                    data = str.read(blocksize)
+                    if not data:
+                        break
+                    if encode:
+                        data = data.encode("iso-8859-1")
                     self.sock.sendall(data)
-                    data=str.read(blocksize)
             else:
                 self.sock.sendall(str)
         except socket.error as v:
@@ -713,8 +721,8 @@
             message_body = None
         self.send(msg)
         if message_body is not None:
-            #message_body was not a string (i.e. it is a file) and
-            #we must run the risk of Nagle
+            # message_body was not a string (i.e. it is a file), and
+            # we must run the risk of Nagle.
             self.send(message_body)
 
     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
@@ -904,7 +912,9 @@
         for hdr, value in headers.items():
             self.putheader(hdr, value)
         if isinstance(body, str):
-            body = body.encode('ascii')
+            # RFC 2616 Section 3.7.1 says that text default has a
+            # default charset of iso-8859-1.
+            body = body.encode('iso-8859-1')
         self.endheaders(body)
 
     def getresponse(self):

Modified: python/branches/py3k/Lib/test/test_httplib.py
==============================================================================
--- python/branches/py3k/Lib/test/test_httplib.py	(original)
+++ python/branches/py3k/Lib/test/test_httplib.py	Fri Mar 27 21:24:34 2009
@@ -272,9 +272,80 @@
             h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30)
             self.assertEqual(h.timeout, 30)
 
+class RequestBodyTest(TestCase):
+    """Test cases where a request includes a message body."""
+
+    def setUp(self):
+        self.conn = httplib.HTTPConnection('example.com')
+        self.sock = FakeSocket("")
+        self.conn.sock = self.sock
+
+    def get_headers_and_fp(self):
+        f = io.BytesIO(self.sock.data)
+        f.readline()  # read the request line
+        message = httplib.parse_headers(f)
+        return message, f
+
+    def test_manual_content_length(self):
+        # Set an incorrect content-length so that we can verify that
+        # it will not be over-ridden by the library.
+        self.conn.request("PUT", "/url", "body",
+                          {"Content-Length": "42"})
+        message, f = self.get_headers_and_fp()
+        self.assertEqual("42", message.get("content-length"))
+        self.assertEqual(4, len(f.read()))
+
+    def test_ascii_body(self):
+        self.conn.request("PUT", "/url", "body")
+        message, f = self.get_headers_and_fp()
+        self.assertEqual("text/plain", message.get_content_type())
+        self.assertEqual(None, message.get_charset())
+        self.assertEqual("4", message.get("content-length"))
+        self.assertEqual(b'body', f.read())
+
+    def test_latin1_body(self):
+        self.conn.request("PUT", "/url", "body\xc1")
+        message, f = self.get_headers_and_fp()
+        self.assertEqual("text/plain", message.get_content_type())
+        self.assertEqual(None, message.get_charset())
+        self.assertEqual("5", message.get("content-length"))
+        self.assertEqual(b'body\xc1', f.read())
+
+    def test_bytes_body(self):
+        self.conn.request("PUT", "/url", b"body\xc1")
+        message, f = self.get_headers_and_fp()
+        self.assertEqual("text/plain", message.get_content_type())
+        self.assertEqual(None, message.get_charset())
+        self.assertEqual("5", message.get("content-length"))
+        self.assertEqual(b'body\xc1', f.read())
+
+    def test_file_body(self):
+        f = open(support.TESTFN, "w")
+        f.write("body")
+        f.close()
+        f = open(support.TESTFN)
+        self.conn.request("PUT", "/url", f)
+        message, f = self.get_headers_and_fp()
+        self.assertEqual("text/plain", message.get_content_type())
+        self.assertEqual(None, message.get_charset())
+        self.assertEqual("4", message.get("content-length"))
+        self.assertEqual(b'body', f.read())
+
+    def test_binary_file_body(self):
+        f = open(support.TESTFN, "wb")
+        f.write(b"body\xc1")
+        f.close()
+        f = open(support.TESTFN, "rb")
+        self.conn.request("PUT", "/url", f)
+        message, f = self.get_headers_and_fp()
+        self.assertEqual("text/plain", message.get_content_type())
+        self.assertEqual(None, message.get_charset())
+        self.assertEqual("5", message.get("content-length"))
+        self.assertEqual(b'body\xc1', f.read())
+
 def test_main(verbose=None):
     support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest,
-                         HTTPSTimeoutTest)
+                         HTTPSTimeoutTest, RequestBodyTest)
 
 if __name__ == '__main__':
     test_main()


More information about the Python-checkins mailing list