[Python-checkins] r85115 - in python/branches/py3k: Include/code.h Objects/codeobject.c Objects/object.c Objects/unicodeobject.c Python/pythonrun.c

victor.stinner python-checkins at python.org
Wed Sep 29 18:35:47 CEST 2010


Author: victor.stinner
Date: Wed Sep 29 18:35:47 2010
New Revision: 85115

Log:
Issue #9630: Redecode filenames when setting the filesystem encoding

Redecode the filenames of:

 - all modules: __file__ and __path__ attributes
 - all code objects: co_filename attribute
 - sys.path
 - sys.meta_path
 - sys.executable
 - sys.path_importer_cache (keys)

Keep weak references to all code objects until initfsencoding() is called, to
be able to redecode co_filename attribute of all code objects.


Modified:
   python/branches/py3k/Include/code.h
   python/branches/py3k/Objects/codeobject.c
   python/branches/py3k/Objects/object.c
   python/branches/py3k/Objects/unicodeobject.c
   python/branches/py3k/Python/pythonrun.c

Modified: python/branches/py3k/Include/code.h
==============================================================================
--- python/branches/py3k/Include/code.h	(original)
+++ python/branches/py3k/Include/code.h	Wed Sep 29 18:35:47 2010
@@ -99,6 +99,13 @@
 PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts,
                                       PyObject *names, PyObject *lineno_obj);
 
+/* List of weak references to all code objects. The list is used by
+   initfsencoding() to redecode code filenames at startup if the filesystem
+   encoding changes. At initfsencoding() exit, the list is set to NULL and it
+   is no more used. */
+
+extern PyObject *_Py_code_object_list;
+
 #ifdef __cplusplus
 }
 #endif

Modified: python/branches/py3k/Objects/codeobject.c
==============================================================================
--- python/branches/py3k/Objects/codeobject.c	(original)
+++ python/branches/py3k/Objects/codeobject.c	Wed Sep 29 18:35:47 2010
@@ -5,6 +5,8 @@
 #define NAME_CHARS \
     "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
 
+PyObject *_Py_code_object_list = NULL;
+
 /* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
 
 static int
@@ -109,8 +111,23 @@
         co->co_lnotab = lnotab;
         co->co_zombieframe = NULL;
         co->co_weakreflist = NULL;
+
+        if (_Py_code_object_list != NULL) {
+            int err;
+            PyObject *ref = PyWeakref_NewRef((PyObject*)co, NULL);
+            if (ref == NULL)
+                goto error;
+            err = PyList_Append(_Py_code_object_list, ref);
+            Py_DECREF(ref);
+            if (err)
+                goto error;
+        }
     }
     return co;
+
+error:
+    Py_DECREF(co);
+    return NULL;
 }
 
 PyCodeObject *

Modified: python/branches/py3k/Objects/object.c
==============================================================================
--- python/branches/py3k/Objects/object.c	(original)
+++ python/branches/py3k/Objects/object.c	Wed Sep 29 18:35:47 2010
@@ -1604,6 +1604,10 @@
     if (PyType_Ready(&PyCode_Type) < 0)
         Py_FatalError("Can't initialize code type");
 
+    _Py_code_object_list = PyList_New(0);
+    if (_Py_code_object_list == NULL)
+        Py_FatalError("Can't initialize code type");
+
     if (PyType_Ready(&PyFrame_Type) < 0)
         Py_FatalError("Can't initialize frame type");
 

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Wed Sep 29 18:35:47 2010
@@ -1510,10 +1510,14 @@
         return PyUnicode_AsEncodedString(unicode,
                                          Py_FileSystemDefaultEncoding,
                                          "surrogateescape");
-    } else
+    }
+    else {
+        /* if you change the default encoding, update also
+           PyUnicode_DecodeFSDefaultAndSize() and redecode_filenames() */
         return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                                     PyUnicode_GET_SIZE(unicode),
                                     "surrogateescape");
+    }
 }
 
 PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
@@ -1680,6 +1684,8 @@
                                 "surrogateescape");
     }
     else {
+        /* if you change the default encoding, update also
+           PyUnicode_EncodeFSDefault() and redecode_filenames() */
         return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
     }
 }

Modified: python/branches/py3k/Python/pythonrun.c
==============================================================================
--- python/branches/py3k/Python/pythonrun.c	(original)
+++ python/branches/py3k/Python/pythonrun.c	Wed Sep 29 18:35:47 2010
@@ -719,6 +719,259 @@
     }
 }
 
+/* Redecode a filename from the default filesystem encoding (utf-8) to
+   'new_encoding' encoding with 'errors' error handler */
+static PyObject*
+redecode_filename(PyObject *file, const char *new_encoding,
+                  const char *errors)
+{
+    PyObject *file_bytes = NULL, *new_file = NULL;
+
+    file_bytes = PyUnicode_EncodeFSDefault(file);
+    if (file_bytes == NULL)
+        return NULL;
+    new_file = PyUnicode_Decode(
+        PyBytes_AsString(file_bytes),
+        PyBytes_GET_SIZE(file_bytes),
+        new_encoding,
+        errors);
+    Py_DECREF(file_bytes);
+    return new_file;
+}
+
+/* Redecode a path list */
+static int
+redecode_path_list(PyObject *paths,
+                   const char *new_encoding, const char *errors)
+{
+    PyObject *filename, *new_filename;
+    Py_ssize_t i, size;
+
+    size = PyList_Size(paths);
+    for (i=0; i < size; i++) {
+        filename = PyList_GetItem(paths, i);
+        if (filename == NULL)
+            return -1;
+
+        new_filename = redecode_filename(filename, new_encoding, errors);
+        if (new_filename == NULL)
+            return -1;
+        if (PyList_SetItem(paths, i, new_filename)) {
+            Py_DECREF(new_filename);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+/* Redecode __file__ and __path__ attributes of sys.modules */
+static int
+redecode_sys_modules(const char *new_encoding, const char *errors)
+{
+    PyInterpreterState *interp;
+    PyObject *modules, *values, *file, *new_file, *paths;
+    PyObject *iter = NULL, *module = NULL;
+
+    interp = PyThreadState_GET()->interp;
+    modules = interp->modules;
+
+    values = PyObject_CallMethod(modules, "values", "");
+    if (values == NULL)
+        goto error;
+
+    iter = PyObject_GetIter(values);
+    Py_DECREF(values);
+    if (iter == NULL)
+        goto error;
+
+    while (1)
+    {
+        module = PyIter_Next(iter);
+        if (module == NULL) {
+            if (PyErr_Occurred())
+                goto error;
+            else
+                break;
+        }
+
+        file = PyModule_GetFilenameObject(module);
+        if (file != NULL) {
+            new_file = redecode_filename(file, new_encoding, errors);
+            Py_DECREF(file);
+            if (new_file == NULL)
+                goto error;
+            if (PyObject_SetAttrString(module, "__file__", new_file)) {
+                Py_DECREF(new_file);
+                goto error;
+            }
+            Py_DECREF(new_file);
+        }
+        else
+            PyErr_Clear();
+
+        paths = PyObject_GetAttrString(module, "__path__");
+        if (paths != NULL) {
+            if (redecode_path_list(paths, new_encoding, errors))
+                goto error;
+        }
+        else
+            PyErr_Clear();
+
+        Py_CLEAR(module);
+    }
+    Py_CLEAR(iter);
+    return 0;
+
+error:
+    Py_XDECREF(iter);
+    Py_XDECREF(module);
+    return -1;
+}
+
+/* Redecode sys.path_importer_cache keys */
+static int
+redecode_sys_path_importer_cache(const char *new_encoding, const char *errors)
+{
+    PyObject *path_importer_cache, *items, *item, *path, *importer, *new_path;
+    PyObject *new_cache = NULL, *iter = NULL;
+
+    path_importer_cache = PySys_GetObject("path_importer_cache");
+    if (path_importer_cache == NULL)
+        goto error;
+
+    items = PyObject_CallMethod(path_importer_cache, "items", "");
+    if (items == NULL)
+        goto error;
+
+    iter = PyObject_GetIter(items);
+    Py_DECREF(items);
+    if (iter == NULL)
+        goto error;
+
+    new_cache = PyDict_New();
+    if (new_cache == NULL)
+        goto error;
+
+    while (1)
+    {
+        item = PyIter_Next(iter);
+        if (item == NULL) {
+            if (PyErr_Occurred())
+                goto error;
+            else
+                break;
+        }
+        path = PyTuple_GET_ITEM(item, 0);
+        importer = PyTuple_GET_ITEM(item, 1);
+
+        new_path = redecode_filename(path, new_encoding, errors);
+        if (new_path == NULL)
+            goto error;
+        if (PyDict_SetItem(new_cache, new_path, importer)) {
+            Py_DECREF(new_path);
+            goto error;
+        }
+        Py_DECREF(new_path);
+    }
+    Py_CLEAR(iter);
+    if (PySys_SetObject("path_importer_cache", new_cache))
+        goto error;
+    Py_CLEAR(new_cache);
+    return 0;
+
+error:
+    Py_XDECREF(iter);
+    Py_XDECREF(new_cache);
+    return -1;
+}
+
+/* Redecode co_filename attribute of all code objects */
+static int
+redecode_code_objects(const char *new_encoding, const char *errors)
+{
+    Py_ssize_t i, len;
+    PyCodeObject *co;
+    PyObject *ref, *new_file;
+
+    len = Py_SIZE(_Py_code_object_list);
+    for (i=0; i < len; i++) {
+        ref = PyList_GET_ITEM(_Py_code_object_list, i);
+        co = (PyCodeObject *)PyWeakref_GetObject(ref);
+        if ((PyObject*)co == Py_None)
+            continue;
+        if (co == NULL)
+            return -1;
+
+        new_file = redecode_filename(co->co_filename, new_encoding, errors);
+        if (new_file == NULL)
+            return -1;
+        Py_DECREF(co->co_filename);
+        co->co_filename = new_file;
+    }
+    Py_CLEAR(_Py_code_object_list);
+    return 0;
+}
+
+/* Redecode the filenames of all modules (__file__ and __path__ attributes),
+   all code objects (co_filename attribute), sys.path, sys.meta_path,
+   sys.executable and sys.path_importer_cache (keys) when the filesystem
+   encoding changes from the default encoding (utf-8) to new_encoding */
+static int
+redecode_filenames(const char *new_encoding)
+{
+    char *errors;
+    PyObject *paths, *executable, *new_executable;
+
+    /* PyUnicode_DecodeFSDefault() and PyUnicode_EncodeFSDefault() do already
+       use utf-8 if Py_FileSystemDefaultEncoding is NULL */
+    if (strcmp(new_encoding, "utf-8") == 0)
+        return 0;
+
+    if (strcmp(new_encoding, "mbcs") != 0)
+        errors = "surrogateescape";
+    else
+        errors = NULL;
+
+    /* sys.modules */
+    if (redecode_sys_modules(new_encoding, errors))
+        return -1;
+
+    /* sys.path and sys.meta_path */
+    paths = PySys_GetObject("path");
+    if (paths != NULL) {
+        if (redecode_path_list(paths, new_encoding, errors))
+            return -1;
+    }
+    paths = PySys_GetObject("meta_path");
+    if (paths != NULL) {
+        if (redecode_path_list(paths, new_encoding, errors))
+            return -1;
+    }
+
+    /* sys.executable */
+    executable = PySys_GetObject("executable");
+    if (executable == NULL)
+        return -1;
+    new_executable = redecode_filename(executable, new_encoding, errors);
+    if (new_executable == NULL)
+        return -1;
+    if (PySys_SetObject("executable", new_executable)) {
+        Py_DECREF(new_executable);
+        return -1;
+    }
+    Py_DECREF(new_executable);
+
+    /* sys.path_importer_cache */
+    if (redecode_sys_path_importer_cache(new_encoding, errors))
+        return -1;
+
+    /* code objects */
+    if (redecode_code_objects(new_encoding, errors))
+        return -1;
+
+    return 0;
+}
+
 static void
 initfsencoding(void)
 {
@@ -744,8 +997,11 @@
             codeset = get_codeset();
         }
         if (codeset != NULL) {
+            if (redecode_filenames(codeset))
+                Py_FatalError("Py_Initialize: can't redecode filenames");
             Py_FileSystemDefaultEncoding = codeset;
             Py_HasFileSystemDefaultEncoding = 0;
+            Py_CLEAR(_Py_code_object_list);
             return;
         } else {
             fprintf(stderr, "Unable to get the locale encoding:\n");
@@ -758,6 +1014,8 @@
     }
 #endif
 
+    Py_CLEAR(_Py_code_object_list);
+
     /* the encoding is mbcs, utf-8 or ascii */
     codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
     if (!codec) {


More information about the Python-checkins mailing list