[Python-checkins] cpython (2.7): Issue 10484 - Incorporate improvements to CGI module - Suggested by Glenn

senthil.kumaran python-checkins at python.org
Wed Apr 11 20:37:42 CEST 2012


http://hg.python.org/cpython/rev/c67efb8ffca4
changeset:   76247:c67efb8ffca4
branch:      2.7
parent:      76225:4d49a2415ced
user:        Senthil Kumaran <senthil at uthcode.com>
date:        Thu Apr 12 02:23:23 2012 +0800
summary:
  Issue 10484 - Incorporate improvements to CGI module - Suggested by Glenn Linderman. Refactor code and tests

files:
  Lib/CGIHTTPServer.py         |  51 ++++++++--------
  Lib/test/test_httpservers.py |  74 ++++++++++++-----------
  2 files changed, 64 insertions(+), 61 deletions(-)


diff --git a/Lib/CGIHTTPServer.py b/Lib/CGIHTTPServer.py
--- a/Lib/CGIHTTPServer.py
+++ b/Lib/CGIHTTPServer.py
@@ -84,10 +84,9 @@
         path begins with one of the strings in self.cgi_directories
         (and the next character is a '/' or the end of the string).
         """
-        splitpath = _url_collapse_path_split(self.path)
-        joined_path = '/'.join(splitpath)
-        dir_sep = joined_path.find('/', 1)
-        head, tail = joined_path[:dir_sep], joined_path[dir_sep+1:]
+        collapsed_path = _url_collapse_path(self.path)
+        dir_sep = collapsed_path.find('/', 1)
+        head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
         if head in self.cgi_directories:
             self.cgi_info = head, tail
             return True
@@ -301,44 +300,46 @@
                 self.log_message("CGI script exited OK")
 
 
-# TODO(gregory.p.smith): Move this into an appropriate library.
-def _url_collapse_path_split(path):
+def _url_collapse_path(path):
     """
     Given a URL path, remove extra '/'s and '.' path elements and collapse
-    any '..' references.
+    any '..' references and returns a colllapsed path.
 
     Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
+    The utility of this function is limited to is_cgi method and helps
+    preventing some security attacks.
 
     Returns: A tuple of (head, tail) where tail is everything after the final /
     and head is everything before it.  Head will always start with a '/' and,
     if it contains anything else, never have a trailing '/'.
 
     Raises: IndexError if too many '..' occur within the path.
+
     """
     # Similar to os.path.split(os.path.normpath(path)) but specific to URL
     # path semantics rather than local operating system semantics.
-    path_parts = []
-    for part in path.split('/'):
-        if part == '.':
-            path_parts.append('')
-        else:
-            path_parts.append(part)
-    # Filter out blank non trailing parts before consuming the '..'.
-    path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
+    path_parts = path.split('/')
+    head_parts = []
+    for part in path_parts[:-1]:
+        if part == '..':
+            head_parts.pop() # IndexError if more '..' than prior parts
+        elif part and part != '.':
+            head_parts.append( part )
     if path_parts:
         tail_part = path_parts.pop()
+        if tail_part:
+            if tail_part == '..':
+                head_parts.pop()
+                tail_part = ''
+            elif tail_part == '.':
+                tail_part = ''
     else:
         tail_part = ''
-    head_parts = []
-    for part in path_parts:
-        if part == '..':
-            head_parts.pop()
-        else:
-            head_parts.append(part)
-    if tail_part and tail_part == '..':
-        head_parts.pop()
-        tail_part = ''
-    return ('/' + '/'.join(head_parts), tail_part)
+
+    splitpath = ('/' + '/'.join(head_parts), tail_part)
+    collapsed_path = "/".join(splitpath)
+
+    return collapsed_path
 
 
 nobody = None
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -4,11 +4,6 @@
 Josip Dzolonga, and Michael Otteneder for the 2007/08 GHOP contest.
 """
 
-from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
-from SimpleHTTPServer import SimpleHTTPRequestHandler
-from CGIHTTPServer import CGIHTTPRequestHandler
-import CGIHTTPServer
-
 import os
 import sys
 import re
@@ -17,12 +12,17 @@
 import urllib
 import httplib
 import tempfile
+import unittest
+import CGIHTTPServer
 
-import unittest
 
+from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
+from SimpleHTTPServer import SimpleHTTPRequestHandler
+from CGIHTTPServer import CGIHTTPRequestHandler
 from StringIO import StringIO
+from test import test_support
 
-from test import test_support
+
 threading = test_support.import_module('threading')
 
 
@@ -43,7 +43,7 @@
         self.end_headers()
         self.wfile.write(b'<html><body>Data</body></html>\r\n')
 
-    def log_message(self, format, *args):
+    def log_message(self, fmt, *args):
         pass
 
 
@@ -97,9 +97,9 @@
         self.handler = SocketlessRequestHandler()
 
     def send_typical_request(self, message):
-        input = StringIO(message)
+        input_msg = StringIO(message)
         output = StringIO()
-        self.handler.rfile = input
+        self.handler.rfile = input_msg
         self.handler.wfile = output
         self.handler.handle_one_request()
         output.seek(0)
@@ -296,7 +296,7 @@
             os.chdir(self.cwd)
             try:
                 shutil.rmtree(self.tempdir)
-            except:
+            except OSError:
                 pass
         finally:
             BaseTestCase.tearDown(self)
@@ -418,42 +418,44 @@
         finally:
             BaseTestCase.tearDown(self)
 
-    def test_url_collapse_path_split(self):
+    def test_url_collapse_path(self):
         # verify tail is the last portion and head is the rest on proper urls
         test_vectors = {
-            '': ('/', ''),
+            '': '//',
             '..': IndexError,
             '/.//..': IndexError,
-            '/': ('/', ''),
-            '//': ('/', ''),
-            '/\\': ('/', '\\'),
-            '/.//': ('/', ''),
-            'cgi-bin/file1.py': ('/cgi-bin', 'file1.py'),
-            '/cgi-bin/file1.py': ('/cgi-bin', 'file1.py'),
-            'a': ('/', 'a'),
-            '/a': ('/', 'a'),
-            '//a': ('/', 'a'),
-            './a': ('/', 'a'),
-            './C:/': ('/C:', ''),
-            '/a/b': ('/a', 'b'),
-            '/a/b/': ('/a/b', ''),
-            '/a/b/c/..': ('/a/b', ''),
-            '/a/b/c/../d': ('/a/b', 'd'),
-            '/a/b/c/../d/e/../f': ('/a/b/d', 'f'),
-            '/a/b/c/../d/e/../../f': ('/a/b', 'f'),
-            '/a/b/c/../d/e/.././././..//f': ('/a/b', 'f'),
+            '/': '//',
+            '//': '//',
+            '/\\': '//\\',
+            '/.//': '//',
+            'cgi-bin/file1.py': '/cgi-bin/file1.py',
+            '/cgi-bin/file1.py': '/cgi-bin/file1.py',
+            'a': '//a',
+            '/a': '//a',
+            '//a': '//a',
+            './a': '//a',
+            './C:/': '/C:/',
+            '/a/b': '/a/b',
+            '/a/b/': '/a/b/',
+            '/a/b/.': '/a/b/',
+            '/a/b/c/..': '/a/b/',
+            '/a/b/c/../d': '/a/b/d',
+            '/a/b/c/../d/e/../f': '/a/b/d/f',
+            '/a/b/c/../d/e/../../f': '/a/b/f',
+            '/a/b/c/../d/e/.././././..//f': '/a/b/f',
             '../a/b/c/../d/e/.././././..//f': IndexError,
-            '/a/b/c/../d/e/../../../f': ('/a', 'f'),
-            '/a/b/c/../d/e/../../../../f': ('/', 'f'),
+            '/a/b/c/../d/e/../../../f': '/a/f',
+            '/a/b/c/../d/e/../../../../f': '//f',
             '/a/b/c/../d/e/../../../../../f': IndexError,
-            '/a/b/c/../d/e/../../../../f/..': ('/', ''),
+            '/a/b/c/../d/e/../../../../f/..': '//',
+            '/a/b/c/../d/e/../../../../f/../.': '//',
         }
         for path, expected in test_vectors.iteritems():
             if isinstance(expected, type) and issubclass(expected, Exception):
                 self.assertRaises(expected,
-                                  CGIHTTPServer._url_collapse_path_split, path)
+                                  CGIHTTPServer._url_collapse_path, path)
             else:
-                actual = CGIHTTPServer._url_collapse_path_split(path)
+                actual = CGIHTTPServer._url_collapse_path(path)
                 self.assertEqual(expected, actual,
                                  msg='path = %r\nGot:    %r\nWanted: %r' %
                                  (path, actual, expected))

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list