[Python-checkins] cpython (2.7): Issue #6815: os.path.expandvars() now supports non-ASCII Unicode environment

serhiy.storchaka python-checkins at python.org
Wed Feb 19 22:28:04 CET 2014


http://hg.python.org/cpython/rev/d11ca14c9a61
changeset:   89285:d11ca14c9a61
branch:      2.7
parent:      89274:292cac3afca6
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Wed Feb 19 23:27:37 2014 +0200
summary:
  Issue #6815: os.path.expandvars() now supports non-ASCII Unicode environment
variables names and values.

files:
  Lib/ntpath.py                |  25 ++++++++----
  Lib/posixpath.py             |  27 ++++++++++---
  Lib/test/test_genericpath.py |  29 ++++++++++++++-
  Lib/test/test_ntpath.py      |  36 ++++++++++++++++--
  Lib/test/test_support.py     |  46 ++++++++++++++++++++++++
  Misc/NEWS                    |   3 +
  6 files changed, 145 insertions(+), 21 deletions(-)


diff --git a/Lib/ntpath.py b/Lib/ntpath.py
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -294,6 +294,13 @@
         return path
     import string
     varchars = string.ascii_letters + string.digits + '_-'
+    if isinstance(path, unicode):
+        encoding = sys.getfilesystemencoding()
+        def getenv(var):
+            return os.environ[var.encode(encoding)].decode(encoding)
+    else:
+        def getenv(var):
+            return os.environ[var]
     res = ''
     index = 0
     pathlen = len(path)
@@ -322,9 +329,9 @@
                     index = pathlen - 1
                 else:
                     var = path[:index]
-                    if var in os.environ:
-                        res = res + os.environ[var]
-                    else:
+                    try:
+                        res = res + getenv(var)
+                    except KeyError:
                         res = res + '%' + var + '%'
         elif c == '$':  # variable or '$$'
             if path[index + 1:index + 2] == '$':
@@ -336,9 +343,9 @@
                 try:
                     index = path.index('}')
                     var = path[:index]
-                    if var in os.environ:
-                        res = res + os.environ[var]
-                    else:
+                    try:
+                        res = res + getenv(var)
+                    except KeyError:
                         res = res + '${' + var + '}'
                 except ValueError:
                     res = res + '${' + path
@@ -351,9 +358,9 @@
                     var = var + c
                     index = index + 1
                     c = path[index:index + 1]
-                if var in os.environ:
-                    res = res + os.environ[var]
-                else:
+                try:
+                    res = res + getenv(var)
+                except KeyError:
                     res = res + '$' + var
                 if c != '':
                     index = index - 1
diff --git a/Lib/posixpath.py b/Lib/posixpath.py
--- a/Lib/posixpath.py
+++ b/Lib/posixpath.py
@@ -285,28 +285,43 @@
 # Non-existent variables are left unchanged.
 
 _varprog = None
+_uvarprog = None
 
 def expandvars(path):
     """Expand shell variables of form $var and ${var}.  Unknown variables
     are left unchanged."""
-    global _varprog
+    global _varprog, _uvarprog
     if '$' not in path:
         return path
-    if not _varprog:
-        import re
-        _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
+    if isinstance(path, _unicode):
+        if not _varprog:
+            import re
+            _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
+        varprog = _varprog
+        encoding = sys.getfilesystemencoding()
+    else:
+        if not _uvarprog:
+            import re
+            _uvarprog = re.compile(_unicode(r'\$(\w+|\{[^}]*\})'), re.UNICODE)
+        varprog = _uvarprog
+        encoding = None
     i = 0
     while True:
-        m = _varprog.search(path, i)
+        m = varprog.search(path, i)
         if not m:
             break
         i, j = m.span(0)
         name = m.group(1)
         if name.startswith('{') and name.endswith('}'):
             name = name[1:-1]
+        if encoding:
+            name = name.encode(encoding)
         if name in os.environ:
             tail = path[j:]
-            path = path[:i] + os.environ[name]
+            value = os.environ[name]
+            if encoding:
+                value = value.decode(encoding)
+            path = path[:i] + value
             i = len(path)
             path += tail
         else:
diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py
--- a/Lib/test/test_genericpath.py
+++ b/Lib/test/test_genericpath.py
@@ -199,13 +199,40 @@
             self.assertEqual(expandvars("$[foo]bar"), "$[foo]bar")
             self.assertEqual(expandvars("$bar bar"), "$bar bar")
             self.assertEqual(expandvars("$?bar"), "$?bar")
-            self.assertEqual(expandvars("${foo}bar"), "barbar")
             self.assertEqual(expandvars("$foo}bar"), "bar}bar")
             self.assertEqual(expandvars("${foo"), "${foo")
             self.assertEqual(expandvars("${{foo}}"), "baz1}")
             self.assertEqual(expandvars("$foo$foo"), "barbar")
             self.assertEqual(expandvars("$bar$bar"), "$bar$bar")
 
+    @unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII')
+    def test_expandvars_nonascii(self):
+        if self.pathmodule.__name__ == 'macpath':
+            self.skipTest('macpath.expandvars is a stub')
+        expandvars = self.pathmodule.expandvars
+        def check(value, expected):
+            self.assertEqual(expandvars(value), expected)
+        encoding = sys.getfilesystemencoding()
+        with test_support.EnvironmentVarGuard() as env:
+            env.clear()
+            unonascii = test_support.FS_NONASCII
+            snonascii = unonascii.encode(encoding)
+            env['spam'] = snonascii
+            env[snonascii] = 'ham' + snonascii
+            check(snonascii, snonascii)
+            check('$spam bar', '%s bar' % snonascii)
+            check('${spam}bar', '%sbar' % snonascii)
+            check('${%s}bar' % snonascii, 'ham%sbar' % snonascii)
+            check('$bar%s bar' % snonascii, '$bar%s bar' % snonascii)
+            check('$spam}bar', '%s}bar' % snonascii)
+
+            check(unonascii, unonascii)
+            check(u'$spam bar', u'%s bar' % unonascii)
+            check(u'${spam}bar', u'%sbar' % unonascii)
+            check(u'${%s}bar' % unonascii, u'ham%sbar' % unonascii)
+            check(u'$bar%s bar' % unonascii, u'$bar%s bar' % unonascii)
+            check(u'$spam}bar', u'%s}bar' % unonascii)
+
     def test_abspath(self):
         self.assertIn("foo", self.pathmodule.abspath("foo"))
 
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -1,16 +1,19 @@
 import ntpath
 import os
+import sys
 from test.test_support import TestFailed
 from test import test_support, test_genericpath
 import unittest
 
+def tester0(fn, wantResult):
+    gotResult = eval(fn)
+    if wantResult != gotResult:
+        raise TestFailed, "%s should return: %r but returned: %r" \
+              %(fn, wantResult, gotResult)
 
 def tester(fn, wantResult):
     fn = fn.replace("\\", "\\\\")
-    gotResult = eval(fn)
-    if wantResult != gotResult:
-        raise TestFailed, "%s should return: %s but returned: %s" \
-              %(str(fn), str(wantResult), str(gotResult))
+    tester0(fn, wantResult)
 
 
 class TestNtpath(unittest.TestCase):
@@ -173,7 +176,6 @@
             tester('ntpath.expandvars("$[foo]bar")', "$[foo]bar")
             tester('ntpath.expandvars("$bar bar")', "$bar bar")
             tester('ntpath.expandvars("$?bar")', "$?bar")
-            tester('ntpath.expandvars("${foo}bar")', "barbar")
             tester('ntpath.expandvars("$foo}bar")', "bar}bar")
             tester('ntpath.expandvars("${foo")', "${foo")
             tester('ntpath.expandvars("${{foo}}")', "baz1}")
@@ -187,6 +189,30 @@
             tester('ntpath.expandvars("%foo%%bar")', "bar%bar")
             tester('ntpath.expandvars("\'%foo%\'%bar")', "\'%foo%\'%bar")
 
+    @unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII')
+    def test_expandvars_nonascii(self):
+        encoding = sys.getfilesystemencoding()
+        def check(value, expected):
+            tester0("ntpath.expandvars(%r)" % value, expected)
+            tester0("ntpath.expandvars(%r)" % value.decode(encoding),
+                    expected.decode(encoding))
+        with test_support.EnvironmentVarGuard() as env:
+            env.clear()
+            unonascii = test_support.FS_NONASCII
+            snonascii = unonascii.encode(encoding)
+            env['spam'] = snonascii
+            env[snonascii] = 'ham' + snonascii
+            check('$spam bar', '%s bar' % snonascii)
+            check('$%s bar' % snonascii, '$%s bar' % snonascii)
+            check('${spam}bar', '%sbar' % snonascii)
+            check('${%s}bar' % snonascii, 'ham%sbar' % snonascii)
+            check('$spam}bar', '%s}bar' % snonascii)
+            check('$%s}bar' % snonascii, '$%s}bar' % snonascii)
+            check('%spam% bar', '%s bar' % snonascii)
+            check('%{}% bar'.format(snonascii), 'ham%s bar' % snonascii)
+            check('%spam%bar', '%sbar' % snonascii)
+            check('%{}%bar'.format(snonascii), 'ham%sbar' % snonascii)
+
     def test_abspath(self):
         # ntpath.abspath() can only be used on a system with the "nt" module
         # (reasonably), so we protect this test with "import nt".  This allows
diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py
--- a/Lib/test/test_support.py
+++ b/Lib/test/test_support.py
@@ -465,6 +465,52 @@
 
 is_jython = sys.platform.startswith('java')
 
+# FS_NONASCII: non-ASCII Unicode character encodable by
+# sys.getfilesystemencoding(), or None if there is no such character.
+FS_NONASCII = None
+if have_unicode:
+    for character in (
+        # First try printable and common characters to have a readable filename.
+        # For each character, the encoding list are just example of encodings able
+        # to encode the character (the list is not exhaustive).
+
+        # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1
+        unichr(0x00E6),
+        # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3
+        unichr(0x0130),
+        # U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257
+        unichr(0x0141),
+        # U+03C6 (Greek Small Letter Phi): cp1253
+        unichr(0x03C6),
+        # U+041A (Cyrillic Capital Letter Ka): cp1251
+        unichr(0x041A),
+        # U+05D0 (Hebrew Letter Alef): Encodable to cp424
+        unichr(0x05D0),
+        # U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic
+        unichr(0x060C),
+        # U+062A (Arabic Letter Teh): cp720
+        unichr(0x062A),
+        # U+0E01 (Thai Character Ko Kai): cp874
+        unichr(0x0E01),
+
+        # Then try more "special" characters. "special" because they may be
+        # interpreted or displayed differently depending on the exact locale
+        # encoding and the font.
+
+        # U+00A0 (No-Break Space)
+        unichr(0x00A0),
+        # U+20AC (Euro Sign)
+        unichr(0x20AC),
+    ):
+        try:
+            character.encode(sys.getfilesystemencoding())\
+                     .decode(sys.getfilesystemencoding())
+        except UnicodeError:
+            pass
+        else:
+            FS_NONASCII = character
+            break
+
 # Filename used for testing
 if os.name == 'java':
     # Jython disallows @ in module names
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -39,6 +39,9 @@
 Library
 -------
 
+- Issue #6815: os.path.expandvars() now supports non-ASCII Unicode environment
+  variables names and values.
+
 - Issue #20635: Fixed grid_columnconfigure() and grid_rowconfigure() methods of
   Tkinter widgets to work in wantobjects=True mode.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list