[Python-checkins] cpython (3.2): 3.2 - Issue 10484 - Incorporate improvements to CGI module - Suggested by Glenn

senthil.kumaran python-checkins at python.org
Wed Apr 11 20:37:45 CEST 2012


http://hg.python.org/cpython/rev/fc001124a3ee
changeset:   76248:fc001124a3ee
branch:      3.2
parent:      76245:9d4109af8f3b
user:        Senthil Kumaran <senthil at uthcode.com>
date:        Thu Apr 12 02:34:32 2012 +0800
summary:
  3.2 - Issue 10484 - Incorporate improvements to CGI module - Suggested by Glenn Linderman. Refactor code and tests

files:
  Lib/http/server.py           |  54 ++++++++++++-----------
  Lib/test/test_httpservers.py |  53 ++++++++++++-----------
  2 files changed, 56 insertions(+), 51 deletions(-)


diff --git a/Lib/http/server.py b/Lib/http/server.py
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -825,44 +825,47 @@
 
 # Utilities for CGIHTTPRequestHandler
 
-# TODO(gregory.p.smith): Move this into an appropriate library.
-def _url_collapse_path_split(path):
+def _url_collapse_path(path):
     """
     Given a URL path, remove extra '/'s and '.' path elements and collapse
-    any '..' references.
+    any '..' references and returns a colllapsed path.
 
     Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
+    The utility of this function is limited to is_cgi method and helps
+    preventing some security attacks.
 
     Returns: A tuple of (head, tail) where tail is everything after the final /
     and head is everything before it.  Head will always start with a '/' and,
     if it contains anything else, never have a trailing '/'.
 
     Raises: IndexError if too many '..' occur within the path.
+
     """
     # Similar to os.path.split(os.path.normpath(path)) but specific to URL
     # path semantics rather than local operating system semantics.
-    path_parts = []
-    for part in path.split('/'):
-        if part == '.':
-            path_parts.append('')
-        else:
-            path_parts.append(part)
-    # Filter out blank non trailing parts before consuming the '..'.
-    path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
+    path_parts = path.split('/')
+    head_parts = []
+    for part in path_parts[:-1]:
+        if part == '..':
+            head_parts.pop() # IndexError if more '..' than prior parts
+        elif part and part != '.':
+            head_parts.append( part )
     if path_parts:
         tail_part = path_parts.pop()
+        if tail_part:
+            if tail_part == '..':
+                head_parts.pop()
+                tail_part = ''
+            elif tail_part == '.':
+                tail_part = ''
     else:
         tail_part = ''
-    head_parts = []
-    for part in path_parts:
-        if part == '..':
-            head_parts.pop()
-        else:
-            head_parts.append(part)
-    if tail_part and tail_part == '..':
-        head_parts.pop()
-        tail_part = ''
-    return ('/' + '/'.join(head_parts), tail_part)
+
+    splitpath = ('/' + '/'.join(head_parts), tail_part)
+    collapsed_path = "/".join(splitpath)
+
+    return collapsed_path
+
 
 
 nobody = None
@@ -943,16 +946,15 @@
         (and the next character is a '/' or the end of the string).
 
         """
-
-        splitpath = _url_collapse_path_split(self.path)
-        joined_path = '/'.join(splitpath)
-        dir_sep = joined_path.find('/',1)
-        head, tail = joined_path[:dir_sep], joined_path[dir_sep+1:]
+        collapsed_path = _url_collapse_path(self.path)
+        dir_sep = collapsed_path.find('/', 1)
+        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
         if head in self.cgi_directories:
             self.cgi_info = head, tail
             return True
         return False
 
+
     cgi_directories = ['/cgi-bin', '/htbin']
 
     def is_executable(self, path):
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -366,41 +366,44 @@
         finally:
             BaseTestCase.tearDown(self)
 
-    def test_url_collapse_path_split(self):
+    def test_url_collapse_path(self):
+        # verify tail is the last portion and head is the rest on proper urls
         test_vectors = {
-            '': ('/', ''),
+            '': '//',
             '..': IndexError,
             '/.//..': IndexError,
-            '/': ('/', ''),
-            '//': ('/', ''),
-            '/\\': ('/', '\\'),
-            '/.//': ('/', ''),
-            'cgi-bin/file1.py': ('/cgi-bin', 'file1.py'),
-            '/cgi-bin/file1.py': ('/cgi-bin', 'file1.py'),
-            'a': ('/', 'a'),
-            '/a': ('/', 'a'),
-            '//a': ('/', 'a'),
-            './a': ('/', 'a'),
-            './C:/': ('/C:', ''),
-            '/a/b': ('/a', 'b'),
-            '/a/b/': ('/a/b', ''),
-            '/a/b/c/..': ('/a/b', ''),
-            '/a/b/c/../d': ('/a/b', 'd'),
-            '/a/b/c/../d/e/../f': ('/a/b/d', 'f'),
-            '/a/b/c/../d/e/../../f': ('/a/b', 'f'),
-            '/a/b/c/../d/e/.././././..//f': ('/a/b', 'f'),
+            '/': '//',
+            '//': '//',
+            '/\\': '//\\',
+            '/.//': '//',
+            'cgi-bin/file1.py': '/cgi-bin/file1.py',
+            '/cgi-bin/file1.py': '/cgi-bin/file1.py',
+            'a': '//a',
+            '/a': '//a',
+            '//a': '//a',
+            './a': '//a',
+            './C:/': '/C:/',
+            '/a/b': '/a/b',
+            '/a/b/': '/a/b/',
+            '/a/b/.': '/a/b/',
+            '/a/b/c/..': '/a/b/',
+            '/a/b/c/../d': '/a/b/d',
+            '/a/b/c/../d/e/../f': '/a/b/d/f',
+            '/a/b/c/../d/e/../../f': '/a/b/f',
+            '/a/b/c/../d/e/.././././..//f': '/a/b/f',
             '../a/b/c/../d/e/.././././..//f': IndexError,
-            '/a/b/c/../d/e/../../../f': ('/a', 'f'),
-            '/a/b/c/../d/e/../../../../f': ('/', 'f'),
+            '/a/b/c/../d/e/../../../f': '/a/f',
+            '/a/b/c/../d/e/../../../../f': '//f',
             '/a/b/c/../d/e/../../../../../f': IndexError,
-            '/a/b/c/../d/e/../../../../f/..': ('/', ''),
+            '/a/b/c/../d/e/../../../../f/..': '//',
+            '/a/b/c/../d/e/../../../../f/../.': '//',
         }
         for path, expected in test_vectors.items():
             if isinstance(expected, type) and issubclass(expected, Exception):
                 self.assertRaises(expected,
-                                  server._url_collapse_path_split, path)
+                                  server._url_collapse_path, path)
             else:
-                actual = server._url_collapse_path_split(path)
+                actual = server._url_collapse_path(path)
                 self.assertEqual(expected, actual,
                                  msg='path = %r\nGot:    %r\nWanted: %r' %
                                  (path, actual, expected))

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list