[Python-checkins] cpython: Issue #3080: get_sourcefile(), make_source_pathname(), load_package()

victor.stinner python-checkins at python.org
Sun Mar 20 04:13:44 CET 2011


http://hg.python.org/cpython/rev/f6507eb8e689
changeset:   68721:f6507eb8e689
user:        Victor Stinner <victor.stinner at haypocalc.com>
date:        Mon Mar 14 13:33:46 2011 -0400
summary:
  Issue #3080: get_sourcefile(), make_source_pathname(), load_package()

Use Unicode for module name and path in get_sourcefile(),
make_source_pathname() and load_package() functions.

files:
  Python/import.c

diff --git a/Python/import.c b/Python/import.c
--- a/Python/import.c
+++ b/Python/import.c
@@ -113,6 +113,8 @@
 #define MAGIC (3180 | ((long)'\r'<<16) | ((long)'\n'<<24))
 #define TAG "cpython-32"
 #define CACHEDIR "__pycache__"
+static const Py_UNICODE CACHEDIR_UNICODE[] = {
+    '_', '_', 'p', 'y', 'c', 'a', 'c', 'h', 'e', '_', '_', '\0'};
 /* Current magic word and string tag as globals. */
 static long pyc_magic = MAGIC;
 static const char *pyc_tag = TAG;
@@ -741,8 +743,8 @@
                       "sys.modules failed");
 }
 
-static PyObject * get_sourcefile(char *file);
-static char *make_source_pathname(char *pathname, char *buf);
+static PyObject * get_sourcefile(PyObject *filename);
+static PyObject *make_source_pathname(PyObject *pathname);
 static char *make_compiled_pathname(char *pathname, char *buf, size_t buflen,
                                     int debug);
 
@@ -807,7 +809,6 @@
 {
     PyObject *modules = PyImport_GetModuleDict();
     PyObject *m, *d, *v;
-    PyObject *pathbytes;
 
     m = PyImport_AddModuleObject(name);
     if (m == NULL)
@@ -822,12 +823,7 @@
     }
     /* Remember the filename as the __file__ attribute */
     if (pathname != NULL) {
-        pathbytes = PyUnicode_EncodeFSDefault(pathname);
-        if (pathbytes != NULL) {
-            v = get_sourcefile(PyBytes_AS_STRING(pathbytes));
-            Py_DECREF(pathbytes);
-        } else
-            v = NULL;
+        v = get_sourcefile(pathname);
         if (v == NULL)
             PyErr_Clear();
     }
@@ -892,6 +888,27 @@
 }
 
 
+/* Like strrchr(string, '/') but searches for the rightmost of either SEP
+   or ALTSEP, if the latter is defined.
+*/
+static Py_UNICODE*
+rightmost_sep_unicode(Py_UNICODE *s)
+{
+    Py_UNICODE *found, c;
+    for (found = NULL; (c = *s); s++) {
+        if (c == SEP
+#ifdef ALTSEP
+            || c == ALTSEP
+#endif
+            )
+        {
+            found = s;
+        }
+    }
+    return found;
+}
+
+
 /* Given a pathname for a Python source file, fill a buffer with the
    pathname for the corresponding compiled file.  Return the pathname
    for the compiled file, or NULL if there's no space in the buffer.
@@ -1005,42 +1022,50 @@
    source file, if the path matches the PEP 3147 format.  This does not check
    for any file existence, however, if the pyc file name does not match PEP
    3147 style, NULL is returned.  buf must be at least as big as pathname;
-   the resulting path will always be shorter. */
-
-static char *
-make_source_pathname(char *pathname, char *buf)
+   the resulting path will always be shorter.
+
+   (...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */
+
+static PyObject*
+make_source_pathname(PyObject *pathobj)
 {
-    /* __pycache__/foo.<tag>.pyc -> foo.py */
+    Py_UNICODE buf[MAXPATHLEN];
+    Py_UNICODE *pathname;
+    Py_UNICODE *left, *right, *dot0, *dot1, sep;
     size_t i, j;
-    char *left, *right, *dot0, *dot1, sep;
+
+    if (PyUnicode_GET_SIZE(pathobj) > MAXPATHLEN)
+        return NULL;
+    pathname = PyUnicode_AS_UNICODE(pathobj);
 
     /* Look back two slashes from the end.  In between these two slashes
        must be the string __pycache__ or this is not a PEP 3147 style
        path.  It's possible for there to be only one slash.
     */
-    if ((right = rightmost_sep(pathname)) == NULL)
+    right = rightmost_sep_unicode(pathname);
+    if (right == NULL)
         return NULL;
     sep = *right;
     *right = '\0';
-    left = rightmost_sep(pathname);
+    left = rightmost_sep_unicode(pathname);
     *right = sep;
     if (left == NULL)
         left = pathname;
     else
         left++;
-    if (right-left != strlen(CACHEDIR) ||
-        strncmp(left, CACHEDIR, right-left) != 0)
+    if (right-left != Py_UNICODE_strlen(CACHEDIR_UNICODE) ||
+        Py_UNICODE_strncmp(left, CACHEDIR_UNICODE, right-left) != 0)
         return NULL;
 
     /* Now verify that the path component to the right of the last slash
        has two dots in it.
     */
-    if ((dot0 = strchr(right + 1, '.')) == NULL)
+    if ((dot0 = Py_UNICODE_strchr(right + 1, '.')) == NULL)
         return NULL;
-    if ((dot1 = strchr(dot0 + 1, '.')) == NULL)
+    if ((dot1 = Py_UNICODE_strchr(dot0 + 1, '.')) == NULL)
         return NULL;
     /* Too many dots? */
-    if (strchr(dot1 + 1, '.') != NULL)
+    if (Py_UNICODE_strchr(dot1 + 1, '.') != NULL)
         return NULL;
 
     /* This is a PEP 3147 path.  Start by copying everything from the
@@ -1048,10 +1073,11 @@
        copy the file's basename, removing the magic tag and adding a .py
        suffix.
     */
-    strncpy(buf, pathname, (i=left-pathname));
-    strncpy(buf+i, right+1, (j=dot0-right));
-    strcpy(buf+i+j, "py");
-    return buf;
+    Py_UNICODE_strncpy(buf, pathname, (i=left-pathname));
+    Py_UNICODE_strncpy(buf+i, right+1, (j=dot0-right));
+    buf[i+j] = 'p';
+    buf[i+j+1] = 'y';
+    return PyUnicode_FromUnicode(buf, i+j+2);
 }
 
 /* Given a pathname for a Python source file, its time of last
@@ -1390,40 +1416,47 @@
  * Returns the path to the py file if available, else the given path
  */
 static PyObject *
-get_sourcefile(char *file)
+get_sourcefile(PyObject *filename)
 {
-    char py[MAXPATHLEN + 1];
     Py_ssize_t len;
-    PyObject *u;
+    Py_UNICODE *fileuni;
+    PyObject *py;
     struct stat statbuf;
 
-    if (!file || !*file) {
+    len = PyUnicode_GET_SIZE(filename);
+    if (len == 0)
         Py_RETURN_NONE;
-    }
-
-    len = strlen(file);
-    /* match '*.py?' */
-    if (len > MAXPATHLEN || PyOS_strnicmp(&file[len-4], ".py", 3) != 0) {
-        return PyUnicode_DecodeFSDefault(file);
-    }
+
+    /* don't match *.pyc or *.pyo? */
+    fileuni = PyUnicode_AS_UNICODE(filename);
+    if (len < 5
+        || fileuni[len-4] != '.'
+        || (fileuni[len-3] != 'p' && fileuni[len-3] != 'P')
+        || (fileuni[len-2] != 'y' && fileuni[len-2] != 'Y'))
+        goto unchanged;
 
     /* Start by trying to turn PEP 3147 path into source path.  If that
      * fails, just chop off the trailing character, i.e. legacy pyc path
      * to py.
      */
-    if (make_source_pathname(file, py) == NULL) {
-        strncpy(py, file, len-1);
-        py[len-1] = '\0';
+    py = make_source_pathname(filename);
+    if (py == NULL) {
+        PyErr_Clear();
+        py = PyUnicode_FromUnicode(fileuni, len - 1);
     }
-
-    if (stat(py, &statbuf) == 0 &&
-        S_ISREG(statbuf.st_mode)) {
-        u = PyUnicode_DecodeFSDefault(py);
-    }
-    else {
-        u = PyUnicode_DecodeFSDefault(file);
-    }
-    return u;
+    if (py == NULL)
+        goto error;
+
+    if (_Py_stat(py, &statbuf) == 0 && S_ISREG(statbuf.st_mode))
+        return py;
+    Py_DECREF(py);
+    goto unchanged;
+
+error:
+    PyErr_Clear();
+unchanged:
+    Py_INCREF(filename);
+    return filename;
 }
 
 /* Forward */
@@ -1436,54 +1469,56 @@
    REFERENCE COUNT */
 
 static PyObject *
-load_package(char *name, char *pathname)
+load_package(PyObject *name, PyObject *pathname)
 {
     PyObject *m, *d;
-    PyObject *file = NULL;
-    PyObject *path = NULL;
+    PyObject *file = NULL, *path_list = NULL;
     int err;
     char buf[MAXPATHLEN+1];
-    FILE *fp;
+    FILE *fp = NULL;
     struct filedescr *fdp;
-
-    m = PyImport_AddModule(name);
+    char *namestr;
+
+    m = PyImport_AddModuleObject(name);
     if (m == NULL)
         return NULL;
     if (Py_VerboseFlag)
-        PySys_WriteStderr("import %s # directory %s\n",
+        PySys_FormatStderr("import %U # directory %U\n",
             name, pathname);
-    d = PyModule_GetDict(m);
     file = get_sourcefile(pathname);
     if (file == NULL)
+        return NULL;
+    path_list = Py_BuildValue("[O]", file);
+    if (path_list == NULL) {
+        Py_DECREF(file);
+        return NULL;
+    }
+    d = PyModule_GetDict(m);
+    err = PyDict_SetItemString(d, "__file__", file);
+    Py_DECREF(file);
+    if (err == 0)
+        err = PyDict_SetItemString(d, "__path__", path_list);
+    if (err != 0) {
+        Py_DECREF(path_list);
+        return NULL;
+    }
+    namestr = _PyUnicode_AsString(name);
+    if (namestr == NULL)
         goto error;
-    path = Py_BuildValue("[O]", file);
-    if (path == NULL)
-        goto error;
-    err = PyDict_SetItemString(d, "__file__", file);
-    if (err == 0)
-        err = PyDict_SetItemString(d, "__path__", path);
-    if (err != 0)
-        goto error;
-    fdp = find_module(name, "__init__", path, buf, sizeof(buf), &fp, NULL);
+    fdp = find_module(namestr, "__init__", path_list, buf, sizeof(buf), &fp, NULL);
+    Py_DECREF(path_list);
     if (fdp == NULL) {
         if (PyErr_ExceptionMatches(PyExc_ImportError)) {
             PyErr_Clear();
             Py_INCREF(m);
+            return m;
         }
         else
-            m = NULL;
-        goto cleanup;
+            return NULL;
     }
-    m = load_module(name, fp, buf, fdp->type, NULL);
+    m = load_module(namestr, fp, buf, fdp->type, NULL);
     if (fp != NULL)
         fclose(fp);
-    goto cleanup;
-
-  error:
-    m = NULL;
-  cleanup:
-    Py_XDECREF(path);
-    Py_XDECREF(file);
     return m;
 }
 
@@ -2282,9 +2317,21 @@
     }
 #endif
 
-    case PKG_DIRECTORY:
-        m = load_package(name, pathname);
+    case PKG_DIRECTORY: {
+        PyObject *nameobj, *pathobj;
+        nameobj = PyUnicode_FromString(name);
+        if (nameobj == NULL)
+            return NULL;
+        pathobj = PyUnicode_DecodeFSDefault(pathname);
+        if (pathobj == NULL) {
+            Py_DECREF(nameobj);
+            return NULL;
+        }
+        m = load_package(nameobj, pathobj);
+        Py_DECREF(nameobj);
+        Py_DECREF(pathobj);
         break;
+    }
 
     case C_BUILTIN:
     case PY_FROZEN: {
@@ -3637,13 +3684,12 @@
 static PyObject *
 imp_load_package(PyObject *self, PyObject *args)
 {
-    char *name;
-    PyObject *pathname;
+    PyObject *name, *pathname;
     PyObject * ret;
-    if (!PyArg_ParseTuple(args, "sO&:load_package",
-                          &name, PyUnicode_FSConverter, &pathname))
+    if (!PyArg_ParseTuple(args, "UO&:load_package",
+                          &name, PyUnicode_FSDecoder, &pathname))
         return NULL;
-    ret = load_package(name, PyBytes_AS_STRING(pathname));
+    ret = load_package(name, pathname);
     Py_DECREF(pathname);
     return ret;
 }
@@ -3716,25 +3762,22 @@
 imp_source_from_cache(PyObject *self, PyObject *args, PyObject *kws)
 {
     static char *kwlist[] = {"path", NULL};
-
-    PyObject *pathname_obj;
-    char *pathname;
-    char buf[MAXPATHLEN+1];
+    PyObject *pathname, *source;
 
     if (!PyArg_ParseTupleAndKeywords(
                 args, kws, "O&", kwlist,
-                PyUnicode_FSConverter, &pathname_obj))
+                PyUnicode_FSDecoder, &pathname))
         return NULL;
 
-    pathname = PyBytes_AS_STRING(pathname_obj);
-    if (make_source_pathname(pathname, buf) == NULL) {
-        PyErr_Format(PyExc_ValueError, "Not a PEP 3147 pyc path: %s",
+    source = make_source_pathname(pathname);
+    if (source == NULL) {
+        PyErr_Format(PyExc_ValueError, "Not a PEP 3147 pyc path: %R",
                      pathname);
-        Py_DECREF(pathname_obj);
+        Py_DECREF(pathname);
         return NULL;
     }
-    Py_DECREF(pathname_obj);
-    return PyUnicode_FromString(buf);
+    Py_DECREF(pathname);
+    return source;
 }
 
 PyDoc_STRVAR(doc_source_from_cache,

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list