[Python-checkins] r51416 - in python/trunk: Lib/test/test_urllib2.py Lib/urllib2.py Misc/NEWS

georg.brandl python-checkins at python.org
Sun Aug 20 15:15:40 CEST 2006


Author: georg.brandl
Date: Sun Aug 20 15:15:39 2006
New Revision: 51416

Modified:
   python/trunk/Lib/test/test_urllib2.py
   python/trunk/Lib/urllib2.py
   python/trunk/Misc/NEWS
Log:
Patch #1542948: fix urllib2 header casing issue. With new test.


Modified: python/trunk/Lib/test/test_urllib2.py
==============================================================================
--- python/trunk/Lib/test/test_urllib2.py	(original)
+++ python/trunk/Lib/test/test_urllib2.py	Sun Aug 20 15:15:39 2006
@@ -46,6 +46,69 @@
             self.assertEquals(urllib2.parse_http_list(string), list)
 
 
+def test_request_headers_dict():
+    """
+    The Request.headers dictionary is not a documented interface.  It should
+    stay that way, because the complete set of headers are only accessible
+    through the .get_header(), .has_header(), .header_items() interface.
+    However, .headers pre-dates those methods, and so real code will be using
+    the dictionary.
+
+    The introduction in 2.4 of those methods was a mistake for the same reason:
+    code that previously saw all (urllib2 user)-provided headers in .headers
+    now sees only a subset (and the function interface is ugly and incomplete).
+    A better change would have been to replace .headers dict with a dict
+    subclass (or UserDict.DictMixin instance?)  that preserved the .headers
+    interface and also provided access to the "unredirected" headers.  It's
+    probably too late to fix that, though.
+
+
+    Check .capitalize() case normalization:
+
+    >>> url = "http://example.com"
+    >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"]
+    'blah'
+    >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"]
+    'blah'
+
+    Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError,
+    but that could be changed in future.
+
+    """
+
+def test_request_headers_methods():
+    """
+    Note the case normalization of header names here, to .capitalize()-case.
+    This should be preserved for backwards-compatibility.  (In the HTTP case,
+    normalization to .title()-case is done by urllib2 before sending headers to
+    httplib).
+
+    >>> url = "http://example.com"
+    >>> r = Request(url, headers={"Spam-eggs": "blah"})
+    >>> r.has_header("Spam-eggs")
+    True
+    >>> r.header_items()
+    [('Spam-eggs', 'blah')]
+    >>> r.add_header("Foo-Bar", "baz")
+    >>> items = r.header_items()
+    >>> items.sort()
+    >>> items
+    [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')]
+
+    Note that e.g. r.has_header("spam-EggS") is currently False, and
+    r.get_header("spam-EggS") returns None, but that could be changed in
+    future.
+
+    >>> r.has_header("Not-there")
+    False
+    >>> print r.get_header("Not-there")
+    None
+    >>> r.get_header("Not-there", "default")
+    'default'
+
+    """
+
+
 def test_password_manager(self):
     """
     >>> mgr = urllib2.HTTPPasswordMgr()
@@ -676,11 +739,11 @@
             r = MockResponse(200, "OK", {}, "")
             newreq = h.do_request_(req)
             if data is None:  # GET
-                self.assert_("Content-Length" not in req.unredirected_hdrs)
-                self.assert_("Content-Type" not in req.unredirected_hdrs)
+                self.assert_("Content-length" not in req.unredirected_hdrs)
+                self.assert_("Content-type" not in req.unredirected_hdrs)
             else:  # POST
-                self.assertEqual(req.unredirected_hdrs["Content-Length"], "0")
-                self.assertEqual(req.unredirected_hdrs["Content-Type"],
+                self.assertEqual(req.unredirected_hdrs["Content-length"], "0")
+                self.assertEqual(req.unredirected_hdrs["Content-type"],
                              "application/x-www-form-urlencoded")
             # XXX the details of Host could be better tested
             self.assertEqual(req.unredirected_hdrs["Host"], "example.com")
@@ -692,8 +755,8 @@
             req.add_unredirected_header("Host", "baz")
             req.add_unredirected_header("Spam", "foo")
             newreq = h.do_request_(req)
-            self.assertEqual(req.unredirected_hdrs["Content-Length"], "foo")
-            self.assertEqual(req.unredirected_hdrs["Content-Type"], "bar")
+            self.assertEqual(req.unredirected_hdrs["Content-length"], "foo")
+            self.assertEqual(req.unredirected_hdrs["Content-type"], "bar")
             self.assertEqual(req.unredirected_hdrs["Host"], "baz")
             self.assertEqual(req.unredirected_hdrs["Spam"], "foo")
 
@@ -847,7 +910,7 @@
             407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
         opener.add_handler(auth_handler)
         opener.add_handler(http_handler)
-        self._test_basic_auth(opener, auth_handler, "Proxy-Authorization",
+        self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
                               realm, http_handler, password_manager,
                               "http://acme.example.com:3128/protected",
                               "proxy.example.com:3128",

Modified: python/trunk/Lib/urllib2.py
==============================================================================
--- python/trunk/Lib/urllib2.py	(original)
+++ python/trunk/Lib/urllib2.py	Sun Aug 20 15:15:39 2006
@@ -263,11 +263,11 @@
 
     def add_header(self, key, val):
         # useful for something like authentication
-        self.headers[key.title()] = val
+        self.headers[key.capitalize()] = val
 
     def add_unredirected_header(self, key, val):
         # will not be added to a redirected request
-        self.unredirected_hdrs[key.title()] = val
+        self.unredirected_hdrs[key.capitalize()] = val
 
     def has_header(self, header_name):
         return (header_name in self.headers or
@@ -286,7 +286,7 @@
 class OpenerDirector:
     def __init__(self):
         client_version = "Python-urllib/%s" % __version__
-        self.addheaders = [('User-Agent', client_version)]
+        self.addheaders = [('User-agent', client_version)]
         # manage the individual handlers
         self.handlers = []
         self.handle_open = {}
@@ -675,7 +675,7 @@
         if user and password:
             user_pass = '%s:%s' % (unquote(user), unquote(password))
             creds = base64.encodestring(user_pass).strip()
-            req.add_header('Proxy-Authorization', 'Basic ' + creds)
+            req.add_header('Proxy-authorization', 'Basic ' + creds)
         hostport = unquote(hostport)
         req.set_proxy(hostport, proxy_type)
         if orig_type == proxy_type:
@@ -819,7 +819,7 @@
 
 class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
 
-    auth_header = 'Proxy-Authorization'
+    auth_header = 'Proxy-authorization'
 
     def http_error_407(self, req, fp, code, msg, headers):
         # http_error_auth_reqed requires that there is no userinfo component in
@@ -1022,20 +1022,20 @@
 
         if request.has_data():  # POST
             data = request.get_data()
-            if not request.has_header('Content-Type'):
+            if not request.has_header('Content-type'):
                 request.add_unredirected_header(
-                    'Content-Type',
+                    'Content-type',
                     'application/x-www-form-urlencoded')
-            if not request.has_header('Content-Length'):
+            if not request.has_header('Content-length'):
                 request.add_unredirected_header(
-                    'Content-Length', '%d' % len(data))
+                    'Content-length', '%d' % len(data))
 
         scheme, sel = splittype(request.get_selector())
         sel_host, sel_path = splithost(sel)
         if not request.has_header('Host'):
             request.add_unredirected_header('Host', sel_host or host)
         for name, value in self.parent.addheaders:
-            name = name.title()
+            name = name.capitalize()
             if not request.has_header(name):
                 request.add_unredirected_header(name, value)
 
@@ -1067,6 +1067,8 @@
         # So make sure the connection gets closed after the (only)
         # request.
         headers["Connection"] = "close"
+        headers = dict(
+            (name.title(), val) for name, val in headers.items())
         try:
             h.request(req.get_method(), req.get_selector(), req.data, headers)
             r = h.getresponse()
@@ -1217,7 +1219,7 @@
         modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
         mtype = mimetypes.guess_type(file)[0]
         headers = mimetools.Message(StringIO(
-            'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
+            'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
             (mtype or 'text/plain', size, modified)))
         if host:
             host, port = splitport(host)
@@ -1272,9 +1274,9 @@
             headers = ""
             mtype = mimetypes.guess_type(req.get_full_url())[0]
             if mtype:
-                headers += "Content-Type: %s\n" % mtype
+                headers += "Content-type: %s\n" % mtype
             if retrlen is not None and retrlen >= 0:
-                headers += "Content-Length: %d\n" % retrlen
+                headers += "Content-length: %d\n" % retrlen
             sf = StringIO(headers)
             headers = mimetools.Message(sf)
             return addinfourl(fp, headers, req.get_full_url())

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Sun Aug 20 15:15:39 2006
@@ -304,8 +304,8 @@
 
 - Bug #978833: Really close underlying socket in _socketobject.close.
 
-- Bug #1459963: urllib and urllib2 now normalize HTTP header names correctly
-  with title().
+- Bug #1459963: urllib and urllib2 now normalize HTTP header names with
+  title().
 
 - Patch #1525766: In pkgutil.walk_packages, correctly pass the onerror callback
   to recursive calls and call it with the failing package name.


More information about the Python-checkins mailing list