[Python-checkins] cpython (3.2): Issue #10914: Initialize correctly the filesystem codec when creating a new

victor.stinner python-checkins at python.org
Sat Apr 30 14:17:48 CEST 2011


http://hg.python.org/cpython/rev/2caf82aee7a4
changeset:   69708:2caf82aee7a4
branch:      3.2
user:        Victor Stinner <victor.stinner at haypocalc.com>
date:        Wed Apr 27 00:24:21 2011 +0200
summary:
  Issue #10914: Initialize correctly the filesystem codec when creating a new
subinterpreter to fix a bootstrap issue with codecs implemented in Python, as
the ISO-8859-15 codec.

Add fscodec_initialized attribute to the PyInterpreterState structure.

files:
  Include/pystate.h       |   1 +
  Misc/NEWS               |   4 ++++
  Objects/unicodeobject.c |  29 ++++++++++++++++++++++-------
  Python/pystate.c        |   1 +
  Python/pythonrun.c      |  23 +++++++++++++++--------
  5 files changed, 43 insertions(+), 15 deletions(-)


diff --git a/Include/pystate.h b/Include/pystate.h
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -31,6 +31,7 @@
     PyObject *codec_search_cache;
     PyObject *codec_error_registry;
     int codecs_initialized;
+    int fscodec_initialized;
 
 #ifdef HAVE_DLOPEN
     int dlopenflags;
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #10914: Initialize correctly the filesystem codec when creating a new
+  subinterpreter to fix a bootstrap issue with codecs implemented in Python, as
+  the ISO-8859-15 codec.
+
 - Issue #10517: After fork(), reinitialize the TLS used by the PyGILState_*
   APIs, to avoid a crash with the pthread implementation in RHEL 5.  Patch
   by Charles-François Natali.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1626,7 +1626,17 @@
                                 PyUnicode_GET_SIZE(unicode),
                                 "surrogateescape");
 #else
-    if (Py_FileSystemDefaultEncoding) {
+    PyInterpreterState *interp = PyThreadState_GET()->interp;
+    /* Bootstrap check: if the filesystem codec is implemented in Python, we
+       cannot use it to encode and decode filenames before it is loaded. Load
+       the Python codec requires to encode at least its own filename. Use the C
+       version of the locale codec until the codec registry is initialized and
+       the Python codec is loaded.
+
+       Py_FileSystemDefaultEncoding is shared between all interpreters, we
+       cannot only rely on it: check also interp->fscodec_initialized for
+       subinterpreters. */
+    if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
         return PyUnicode_AsEncodedString(unicode,
                                          Py_FileSystemDefaultEncoding,
                                          "surrogateescape");
@@ -1818,12 +1828,17 @@
 #elif defined(__APPLE__)
     return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
 #else
-    /* During the early bootstrapping process, Py_FileSystemDefaultEncoding
-       can be undefined. If it is case, decode using UTF-8. The following assumes
-       that Py_FileSystemDefaultEncoding is set to a built-in encoding during the
-       bootstrapping process where the codecs aren't ready yet.
-    */
-    if (Py_FileSystemDefaultEncoding) {
+    PyInterpreterState *interp = PyThreadState_GET()->interp;
+    /* Bootstrap check: if the filesystem codec is implemented in Python, we
+       cannot use it to encode and decode filenames before it is loaded. Load
+       the Python codec requires to encode at least its own filename. Use the C
+       version of the locale codec until the codec registry is initialized and
+       the Python codec is loaded.
+
+       Py_FileSystemDefaultEncoding is shared between all interpreters, we
+       cannot only rely on it: check also interp->fscodec_initialized for
+       subinterpreters. */
+    if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
         return PyUnicode_Decode(s, size,
                                 Py_FileSystemDefaultEncoding,
                                 "surrogateescape");
diff --git a/Python/pystate.c b/Python/pystate.c
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -79,6 +79,7 @@
         interp->codec_search_cache = NULL;
         interp->codec_error_registry = NULL;
         interp->codecs_initialized = 0;
+        interp->fscodec_initialized = 0;
 #ifdef HAVE_DLOPEN
 #ifdef RTLD_NOW
         interp->dlopenflags = RTLD_NOW;
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -53,7 +53,7 @@
 
 /* Forward */
 static void initmain(void);
-static void initfsencoding(void);
+static int initfsencoding(PyInterpreterState *interp);
 static void initsite(void);
 static int initstdio(void);
 static void flush_io(void);
@@ -291,7 +291,8 @@
 
     _PyTime_Init();
 
-    initfsencoding();
+    if (initfsencoding(interp) < 0)
+        Py_FatalError("Py_Initialize: unable to load the file system codec");
 
     if (install_sigs)
         initsigs(); /* Signal handling stuff, including initintr() */
@@ -608,6 +609,10 @@
         Py_DECREF(pstderr);
 
         _PyImportHooks_Init();
+
+        if (initfsencoding(interp) < 0)
+            goto handle_error;
+
         if (initstdio() < 0)
             Py_FatalError(
             "Py_Initialize: can't initialize sys standard streams");
@@ -720,8 +725,8 @@
     }
 }
 
-static void
-initfsencoding(void)
+static int
+initfsencoding(PyInterpreterState *interp)
 {
     PyObject *codec;
 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
@@ -738,7 +743,8 @@
 
         Py_FileSystemDefaultEncoding = codeset;
         Py_HasFileSystemDefaultEncoding = 0;
-        return;
+        interp->fscodec_initialized = 1;
+        return 0;
     }
 #endif
 
@@ -748,10 +754,11 @@
         /* Such error can only occurs in critical situations: no more
          * memory, import a module of the standard library failed,
          * etc. */
-        Py_FatalError("Py_Initialize: unable to load the file system codec");
-    } else {
-        Py_DECREF(codec);
+        return -1;
     }
+    Py_DECREF(codec);
+    interp->fscodec_initialized = 1;
+    return 0;
 }
 
 /* Import the site module (not into __main__ though) */

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list