[Jython-checkins] jython: Updated httplib, urllib, urllib2 to CPython 2.7.13 versions.

Fri Mar 3 13:18:28 EST 2017

https://hg.python.org/jython/rev/16b977e954b4
changeset:   8045:16b977e954b4
user:        Stefan Richthofer <stefan.richthofer at gmx.de>
date:        Fri Mar 03 19:17:26 2017 +0100
summary:
  Updated httplib, urllib, urllib2 to CPython 2.7.13 versions.

files:
  Lib/test/test_httplib.py            |  118 +++++++++++++++-
  Lib/urllib.py                       |    2 +-
  lib-python/2.7/httplib.py           |   35 +---
  lib-python/2.7/test/test_httplib.py |  118 +++++++++++++++-
  lib-python/2.7/urllib.py            |    2 +-
  5 files changed, 244 insertions(+), 31 deletions(-)

diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -241,6 +241,120 @@
         self.assertEqual(resp.getheader('First'), 'val')
         self.assertEqual(resp.getheader('Second'), 'val')
 
+	def test_malformed_truncation(self):
+		# Other malformed header lines, especially without colons, used to
+		# cause the rest of the header section to be truncated
+		resp = (
+			b'HTTP/1.1 200 OK\r\n'
+			b'Public-Key-Pins: \n'
+			b'pin-sha256="xxx=";\n'
+			b'report-uri="https://..."\r\n'
+			b'Transfer-Encoding: chunked\r\n'
+			b'\r\n'
+			b'4\r\nbody\r\n0\r\n\r\n'
+		)
+		resp = httplib.HTTPResponse(FakeSocket(resp))
+		resp.begin()
+		self.assertIsNotNone(resp.getheader('Public-Key-Pins'))
+		self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+		self.assertEqual(resp.read(), b'body')
+
+	def test_blank_line_forms(self):
+		# Test that both CRLF and LF blank lines can terminate the header
+		# section and start the body
+		for blank in (b'\r\n', b'\n'):
+			resp = b'HTTP/1.1 200 OK\r\n' b'Transfer-Encoding: chunked\r\n'
+			resp += blank
+			resp += b'4\r\nbody\r\n0\r\n\r\n'
+			resp = httplib.HTTPResponse(FakeSocket(resp))
+			resp.begin()
+			self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+			self.assertEqual(resp.read(), b'body')
+
+			resp = b'HTTP/1.0 200 OK\r\n' + blank + b'body'
+			resp = httplib.HTTPResponse(FakeSocket(resp))
+			resp.begin()
+			self.assertEqual(resp.read(), b'body')
+
+		# A blank line ending in CR is not treated as the end of the HTTP
+		# header section, therefore header fields following it should be
+		# parsed if possible
+		resp = (
+			b'HTTP/1.1 200 OK\r\n'
+			b'\r'
+			b'Name: value\r\n'
+			b'Transfer-Encoding: chunked\r\n'
+			b'\r\n'
+			b'4\r\nbody\r\n0\r\n\r\n'
+		)
+		resp = httplib.HTTPResponse(FakeSocket(resp))
+		resp.begin()
+		self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+		self.assertEqual(resp.read(), b'body')
+
+		# No header fields nor blank line
+		resp = b'HTTP/1.0 200 OK\r\n'
+		resp = httplib.HTTPResponse(FakeSocket(resp))
+		resp.begin()
+		self.assertEqual(resp.read(), b'')
+
+	def test_from_line(self):
+		# The parser handles "From" lines specially, so test this does not
+		# affect parsing the rest of the header section
+		resp = (
+			b'HTTP/1.1 200 OK\r\n'
+			b'From start\r\n'
+			b' continued\r\n'
+			b'Name: value\r\n'
+			b'From middle\r\n'
+			b' continued\r\n'
+			b'Transfer-Encoding: chunked\r\n'
+			b'From end\r\n'
+			b'\r\n'
+			b'4\r\nbody\r\n0\r\n\r\n'
+		)
+		resp = httplib.HTTPResponse(FakeSocket(resp))
+		resp.begin()
+		self.assertIsNotNone(resp.getheader('Name'))
+		self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+		self.assertEqual(resp.read(), b'body')
+
+		resp = (
+			b'HTTP/1.0 200 OK\r\n'
+			b'From alone\r\n'
+			b'\r\n'
+			b'body'
+		)
+		resp = httplib.HTTPResponse(FakeSocket(resp))
+		resp.begin()
+		self.assertEqual(resp.read(), b'body')
+
+	def test_parse_all_octets(self):
+		# Ensure no valid header field octet breaks the parser
+		body = (
+			b'HTTP/1.1 200 OK\r\n'
+			b"!#$%&'*+-.^_`|~: value\r\n"  # Special token characters
+			b'VCHAR: ' + bytearray(range(0x21, 0x7E + 1)) + b'\r\n'
+			b'obs-text: ' + bytearray(range(0x80, 0xFF + 1)) + b'\r\n'
+			b'obs-fold: text\r\n'
+			b' folded with space\r\n'
+			b'\tfolded with tab\r\n'
+			b'Content-Length: 0\r\n'
+			b'\r\n'
+		)
+		sock = FakeSocket(body)
+		resp = httplib.HTTPResponse(sock)
+		resp.begin()
+		self.assertEqual(resp.getheader('Content-Length'), '0')
+		self.assertEqual(resp.getheader("!#$%&'*+-.^_`|~"), 'value')
+		vchar = ''.join(map(chr, range(0x21, 0x7E + 1)))
+		self.assertEqual(resp.getheader('VCHAR'), vchar)
+		self.assertIsNotNone(resp.getheader('obs-text'))
+		folded = resp.getheader('obs-fold')
+		self.assertTrue(folded.startswith('text'))
+		self.assertIn(' folded with space', folded)
+		self.assertTrue(folded.endswith('folded with tab'))
+
     def test_invalid_headers(self):
         conn = httplib.HTTPConnection('example.com')
         conn.sock = FakeSocket('')
@@ -525,7 +639,7 @@
         self.assertTrue(hasattr(resp,'fileno'),
                 'HTTPResponse should expose a fileno attribute')
 
-    # Test lines overflowing the max line size (_MAXLINE in http.client)
+    # Test lines overflowing the max line size (_MAXLINE in httplib)
 
     def test_overflowing_status_line(self):
         self.skipTest("disabled for HTTP 0.9 support")
@@ -624,7 +738,7 @@
     def testHTTPSConnectionSourceAddress(self):
         self.conn = httplib.HTTPSConnection(HOST, self.port,
                 source_address=('', self.source_port))
-        # We don't test anything here other the constructor not barfing as
+        # We don't test anything here other than the constructor not barfing as
         # this code doesn't deal with setting up an active running SSL server
         # for an ssl_wrapped connect() to actually return from.
 
diff --git a/Lib/urllib.py b/Lib/urllib.py
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -142,7 +142,7 @@
         self.key_file = x509.get('key_file')
         self.cert_file = x509.get('cert_file')
         self.context = context
-        self.addheaders = [('User-Agent', self.version)]
+        self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
         self.__tempfiles = []
         self.__unlink = os.unlink # See cleanup()
         self.tempcache = None
diff --git a/lib-python/2.7/httplib.py b/lib-python/2.7/httplib.py
--- a/lib-python/2.7/httplib.py
+++ b/lib-python/2.7/httplib.py
@@ -242,7 +242,7 @@
 #
 # VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
 
-# the patterns for both name and value are more leniant than RFC
+# the patterns for both name and value are more lenient than RFC
 # definitions to allow for backwards compatibility
 _is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match
 _is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search
@@ -273,9 +273,8 @@
 
         Read header lines up to the entirely blank line that terminates them.
         The (normally blank) line that ends the headers is skipped, but not
-        included in the returned list.  If a non-header line ends the headers,
-        (which is an error), an attempt is made to backspace over it; it is
-        never included in the returned list.
+        included in the returned list.  If an invalid line is found in the
+        header section, it is skipped, and further lines are processed.
 
         The variable self.status is set to the empty string if all went well,
         otherwise it is an error message.  The variable self.headers is a
@@ -302,19 +301,17 @@
         self.status = ''
         headerseen = ""
         firstline = 1
-        startofline = unread = tell = None
-        if hasattr(self.fp, 'unread'):
-            unread = self.fp.unread
-        elif self.seekable:
+        tell = None
+        if not hasattr(self.fp, 'unread') and self.seekable:
             tell = self.fp.tell
         while True:
             if len(hlist) > _MAXHEADERS:
                 raise HTTPException("got more than %d headers" % _MAXHEADERS)
             if tell:
                 try:
-                    startofline = tell()
+                    tell()
                 except IOError:
-                    startofline = tell = None
+                    tell = None
                     self.seekable = 0
             line = self.fp.readline(_MAXLINE + 1)
             if len(line) > _MAXLINE:
@@ -345,26 +342,14 @@
                 # It's a legal header line, save it.
                 hlist.append(line)
                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
-                continue
             elif headerseen is not None:
                 # An empty header name. These aren't allowed in HTTP, but it's
                 # probably a benign mistake. Don't add the header, just keep
                 # going.
-                continue
+                pass
             else:
-                # It's not a header line; throw it back and stop here.
-                if not self.dict:
-                    self.status = 'No headers'
-                else:
-                    self.status = 'Non-header line where header expected'
-                # Try to undo the read.
-                if unread:
-                    unread(line)
-                elif tell:
-                    self.fp.seek(startofline)
-                else:
-                    self.status = self.status + '; bad seek'
-                break
+                # It's not a header line; skip it and try the next line.
+                self.status = 'Non-header line where header expected'
 
 class HTTPResponse:
 
diff --git a/lib-python/2.7/test/test_httplib.py b/lib-python/2.7/test/test_httplib.py
--- a/lib-python/2.7/test/test_httplib.py
+++ b/lib-python/2.7/test/test_httplib.py
@@ -241,6 +241,120 @@
         self.assertEqual(resp.getheader('First'), 'val')
         self.assertEqual(resp.getheader('Second'), 'val')
 
+    def test_malformed_truncation(self):
+        # Other malformed header lines, especially without colons, used to
+        # cause the rest of the header section to be truncated
+        resp = (
+            b'HTTP/1.1 200 OK\r\n'
+            b'Public-Key-Pins: \n'
+            b'pin-sha256="xxx=";\n'
+            b'report-uri="https://..."\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'4\r\nbody\r\n0\r\n\r\n'
+        )
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertIsNotNone(resp.getheader('Public-Key-Pins'))
+        self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+        self.assertEqual(resp.read(), b'body')
+
+    def test_blank_line_forms(self):
+        # Test that both CRLF and LF blank lines can terminate the header
+        # section and start the body
+        for blank in (b'\r\n', b'\n'):
+            resp = b'HTTP/1.1 200 OK\r\n' b'Transfer-Encoding: chunked\r\n'
+            resp += blank
+            resp += b'4\r\nbody\r\n0\r\n\r\n'
+            resp = httplib.HTTPResponse(FakeSocket(resp))
+            resp.begin()
+            self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+            self.assertEqual(resp.read(), b'body')
+
+            resp = b'HTTP/1.0 200 OK\r\n' + blank + b'body'
+            resp = httplib.HTTPResponse(FakeSocket(resp))
+            resp.begin()
+            self.assertEqual(resp.read(), b'body')
+
+        # A blank line ending in CR is not treated as the end of the HTTP
+        # header section, therefore header fields following it should be
+        # parsed if possible
+        resp = (
+            b'HTTP/1.1 200 OK\r\n'
+            b'\r'
+            b'Name: value\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'\r\n'
+            b'4\r\nbody\r\n0\r\n\r\n'
+        )
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+        self.assertEqual(resp.read(), b'body')
+
+        # No header fields nor blank line
+        resp = b'HTTP/1.0 200 OK\r\n'
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertEqual(resp.read(), b'')
+
+    def test_from_line(self):
+        # The parser handles "From" lines specially, so test this does not
+        # affect parsing the rest of the header section
+        resp = (
+            b'HTTP/1.1 200 OK\r\n'
+            b'From start\r\n'
+            b' continued\r\n'
+            b'Name: value\r\n'
+            b'From middle\r\n'
+            b' continued\r\n'
+            b'Transfer-Encoding: chunked\r\n'
+            b'From end\r\n'
+            b'\r\n'
+            b'4\r\nbody\r\n0\r\n\r\n'
+        )
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertIsNotNone(resp.getheader('Name'))
+        self.assertEqual(resp.getheader('Transfer-Encoding'), 'chunked')
+        self.assertEqual(resp.read(), b'body')
+
+        resp = (
+            b'HTTP/1.0 200 OK\r\n'
+            b'From alone\r\n'
+            b'\r\n'
+            b'body'
+        )
+        resp = httplib.HTTPResponse(FakeSocket(resp))
+        resp.begin()
+        self.assertEqual(resp.read(), b'body')
+
+    def test_parse_all_octets(self):
+        # Ensure no valid header field octet breaks the parser
+        body = (
+            b'HTTP/1.1 200 OK\r\n'
+            b"!#$%&'*+-.^_`|~: value\r\n"  # Special token characters
+            b'VCHAR: ' + bytearray(range(0x21, 0x7E + 1)) + b'\r\n'
+            b'obs-text: ' + bytearray(range(0x80, 0xFF + 1)) + b'\r\n'
+            b'obs-fold: text\r\n'
+            b' folded with space\r\n'
+            b'\tfolded with tab\r\n'
+            b'Content-Length: 0\r\n'
+            b'\r\n'
+        )
+        sock = FakeSocket(body)
+        resp = httplib.HTTPResponse(sock)
+        resp.begin()
+        self.assertEqual(resp.getheader('Content-Length'), '0')
+        self.assertEqual(resp.getheader("!#$%&'*+-.^_`|~"), 'value')
+        vchar = ''.join(map(chr, range(0x21, 0x7E + 1)))
+        self.assertEqual(resp.getheader('VCHAR'), vchar)
+        self.assertIsNotNone(resp.getheader('obs-text'))
+        folded = resp.getheader('obs-fold')
+        self.assertTrue(folded.startswith('text'))
+        self.assertIn(' folded with space', folded)
+        self.assertTrue(folded.endswith('folded with tab'))
+
     def test_invalid_headers(self):
         conn = httplib.HTTPConnection('example.com')
         conn.sock = FakeSocket('')
@@ -525,7 +639,7 @@
         self.assertTrue(hasattr(resp,'fileno'),
                 'HTTPResponse should expose a fileno attribute')
 
-    # Test lines overflowing the max line size (_MAXLINE in http.client)
+    # Test lines overflowing the max line size (_MAXLINE in httplib)
 
     def test_overflowing_status_line(self):
         self.skipTest("disabled for HTTP 0.9 support")
@@ -624,7 +738,7 @@
     def testHTTPSConnectionSourceAddress(self):
         self.conn = httplib.HTTPSConnection(HOST, self.port,
                 source_address=('', self.source_port))
-        # We don't test anything here other the constructor not barfing as
+        # We don't test anything here other than the constructor not barfing as
         # this code doesn't deal with setting up an active running SSL server
         # for an ssl_wrapped connect() to actually return from.
 
diff --git a/lib-python/2.7/urllib.py b/lib-python/2.7/urllib.py
--- a/lib-python/2.7/urllib.py
+++ b/lib-python/2.7/urllib.py
@@ -138,7 +138,7 @@
         self.key_file = x509.get('key_file')
         self.cert_file = x509.get('cert_file')
         self.context = context
-        self.addheaders = [('User-Agent', self.version)]
+        self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
         self.__tempfiles = []
         self.__unlink = os.unlink # See cleanup()
         self.tempcache = None

-- 
Repository URL: https://hg.python.org/jython