[Python-checkins] r84182 - in python/branches/py3k: Doc/using/cmdline.rst Doc/whatsnew/3.2.rst Lib/test/test_pep277.py Lib/test/test_sys.py Misc/NEWS Modules/main.c Python/pythonrun.c

victor.stinner python-checkins at python.org
Wed Aug 18 23:23:25 CEST 2010


Author: victor.stinner
Date: Wed Aug 18 23:23:25 2010
New Revision: 84182

Log:
Issue #8622: Add PYTHONFSENCODING environment variable to override the
filesystem encoding.

initfsencoding() displays also a better error message if get_codeset() failed.


Modified:
   python/branches/py3k/Doc/using/cmdline.rst
   python/branches/py3k/Doc/whatsnew/3.2.rst
   python/branches/py3k/Lib/test/test_pep277.py
   python/branches/py3k/Lib/test/test_sys.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Modules/main.c
   python/branches/py3k/Python/pythonrun.c

Modified: python/branches/py3k/Doc/using/cmdline.rst
==============================================================================
--- python/branches/py3k/Doc/using/cmdline.rst	(original)
+++ python/branches/py3k/Doc/using/cmdline.rst	Wed Aug 18 23:23:25 2010
@@ -442,11 +442,20 @@
    import of source modules.
 
 
+.. envvar:: PYTHONFSENCODING
+
+   If this is set before running the intepreter, it overrides the encoding used
+   for the filesystem encoding (see :func:`sys.getfilesystemencoding`).
+
+   .. versionadded:: 3.2
+
+
 .. envvar:: PYTHONIOENCODING
 
-   Overrides the encoding used for stdin/stdout/stderr, in the syntax
-   ``encodingname:errorhandler``.  The ``:errorhandler`` part is optional and
-   has the same meaning as in :func:`str.encode`.
+   If this is set before running the intepreter, it overrides the encoding used
+   for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The
+   ``:errorhandler`` part is optional and has the same meaning as in
+   :func:`str.encode`.
 
    For stderr, the ``:errorhandler`` part is ignored; the handler will always be
    ``'backslashreplace'``.

Modified: python/branches/py3k/Doc/whatsnew/3.2.rst
==============================================================================
--- python/branches/py3k/Doc/whatsnew/3.2.rst	(original)
+++ python/branches/py3k/Doc/whatsnew/3.2.rst	Wed Aug 18 23:23:25 2010
@@ -232,6 +232,15 @@
 
 * Stub
 
+
+Unicode
+=======
+
+The filesystem encoding can be specified by setting the
+:envvar:`PYTHONFSENCODING` environment variable before running the intepreter.
+The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
+
+
 IDLE
 ====
 

Modified: python/branches/py3k/Lib/test/test_pep277.py
==============================================================================
--- python/branches/py3k/Lib/test/test_pep277.py	(original)
+++ python/branches/py3k/Lib/test/test_pep277.py	Wed Aug 18 23:23:25 2010
@@ -43,7 +43,7 @@
 
 # Is it Unicode-friendly?
 if not os.path.supports_unicode_filenames:
-    fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
+    fsencoding = sys.getfilesystemencoding()
     try:
         for name in filenames:
             name.encode(fsencoding)

Modified: python/branches/py3k/Lib/test/test_sys.py
==============================================================================
--- python/branches/py3k/Lib/test/test_sys.py	(original)
+++ python/branches/py3k/Lib/test/test_sys.py	Wed Aug 18 23:23:25 2010
@@ -863,16 +863,24 @@
     def test_getfilesystemencoding(self):
         import codecs
 
-        def check_fsencoding(fs_encoding):
+        def check_fsencoding(fs_encoding, expected=None):
             self.assertIsNotNone(fs_encoding)
             if sys.platform == 'darwin':
                 self.assertEqual(fs_encoding, 'utf-8')
             codecs.lookup(fs_encoding)
+            if expected:
+                self.assertEqual(fs_encoding, expected)
 
         fs_encoding = sys.getfilesystemencoding()
         check_fsencoding(fs_encoding)
 
-        # Even in C locale
+        def get_fsencoding(env):
+            output = subprocess.check_output(
+                [sys.executable, "-c",
+                 "import sys; print(sys.getfilesystemencoding())"],
+                env=env)
+            return output.rstrip().decode('ascii')
+
         try:
             sys.executable.encode('ascii')
         except UnicodeEncodeError:
@@ -880,14 +888,22 @@
             # see issue #8611
             pass
         else:
+            # Even in C locale
             env = os.environ.copy()
             env['LANG'] = 'C'
-            output = subprocess.check_output(
-                [sys.executable, "-c",
-                 "import sys; print(sys.getfilesystemencoding())"],
-                env=env)
-            fs_encoding = output.rstrip().decode('ascii')
-            check_fsencoding(fs_encoding)
+            try:
+                del env['PYTHONFSENCODING']
+            except KeyError:
+                pass
+            check_fsencoding(get_fsencoding(env), 'ascii')
+
+            # Filesystem encoding is hardcoded on Windows and Mac OS X
+            if sys.platform not in ('win32', 'darwin'):
+                for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'):
+                    env = os.environ.copy()
+                    env['PYTHONFSENCODING'] = encoding
+                    check_fsencoding(get_fsencoding(env), encoding)
+
 
     def test_setfilesystemencoding(self):
         old = sys.getfilesystemencoding()

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Wed Aug 18 23:23:25 2010
@@ -12,6 +12,9 @@
 Core and Builtins
 -----------------
 
+- Issue #8622: Add PYTHONFSENCODING environment variable to override the
+  filesystem encoding.
+
 - Issue #5127: The C functions that access the Unicode Database now accept and
   return characters from the full Unicode range, even on narrow unicode builds
   (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others).  A visible difference

Modified: python/branches/py3k/Modules/main.c
==============================================================================
--- python/branches/py3k/Modules/main.c	(original)
+++ python/branches/py3k/Modules/main.c	Wed Aug 18 23:23:25 2010
@@ -99,6 +99,7 @@
                The default module search path uses %s.\n\
 PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
 PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
+PYTHONFSENCODING: Encoding used for the filesystem.\n\
 ";
 
 FILE *

Modified: python/branches/py3k/Python/pythonrun.c
==============================================================================
--- python/branches/py3k/Python/pythonrun.c	(original)
+++ python/branches/py3k/Python/pythonrun.c	Wed Aug 18 23:23:25 2010
@@ -134,18 +134,13 @@
     return flag;
 }
 
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
 static char*
-get_codeset(void)
+get_codec_name(const char *encoding)
 {
-    char* codeset, *name_str;
+    char *name_utf8, *name_str;
     PyObject *codec, *name = NULL;
 
-    codeset = nl_langinfo(CODESET);
-    if (!codeset || codeset[0] == '\0')
-        return NULL;
-
-    codec = _PyCodec_Lookup(codeset);
+    codec = _PyCodec_Lookup(encoding);
     if (!codec)
         goto error;
 
@@ -154,18 +149,34 @@
     if (!name)
         goto error;
 
-    name_str = _PyUnicode_AsString(name);
+    name_utf8 = _PyUnicode_AsString(name);
     if (name == NULL)
         goto error;
-    codeset = strdup(name_str);
+    name_str = strdup(name_utf8);
     Py_DECREF(name);
-    return codeset;
+    if (name_str == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    return name_str;
 
 error:
     Py_XDECREF(codec);
     Py_XDECREF(name);
     return NULL;
 }
+
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+static char*
+get_codeset(void)
+{
+    char* codeset = nl_langinfo(CODESET);
+    if (!codeset || codeset[0] == '\0') {
+        PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
+        return NULL;
+    }
+    return get_codec_name(codeset);
+}
 #endif
 
 void
@@ -706,25 +717,35 @@
 {
     PyObject *codec;
 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
-    char *codeset;
+    char *codeset = NULL;
 
     if (Py_FileSystemDefaultEncoding == NULL) {
-        /* On Unix, set the file system encoding according to the
-           user's preference, if the CODESET names a well-known
-           Python codec, and Py_FileSystemDefaultEncoding isn't
-           initialized by other means. Also set the encoding of
-           stdin and stdout if these are terminals.  */
-        codeset = get_codeset();
+        const char *env_encoding = Py_GETENV("PYTHONFSENCODING");
+        if (env_encoding != NULL) {
+            codeset = get_codec_name(env_encoding);
+            if (!codeset) {
+                fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n");
+                PyErr_Print();
+            }
+        }
+        if (!codeset) {
+            /* On Unix, set the file system encoding according to the
+               user's preference, if the CODESET names a well-known
+               Python codec, and Py_FileSystemDefaultEncoding isn't
+               initialized by other means. Also set the encoding of
+               stdin and stdout if these are terminals.  */
+            codeset = get_codeset();
+        }
         if (codeset != NULL) {
             Py_FileSystemDefaultEncoding = codeset;
             Py_HasFileSystemDefaultEncoding = 0;
             return;
+        } else {
+            fprintf(stderr, "Unable to get the locale encoding:\n");
+            PyErr_Print();
         }
 
-        PyErr_Clear();
-        fprintf(stderr,
-                "Unable to get the locale encoding: "
-                "fallback to utf-8\n");
+        fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n");
         Py_FileSystemDefaultEncoding = "utf-8";
         Py_HasFileSystemDefaultEncoding = 1;
     }


More information about the Python-checkins mailing list