[Python-checkins] cpython (merge default -> default): Merge

Fri Jul 13 21:01:26 CEST 2012

http://hg.python.org/cpython/rev/482cff0eebda
changeset:   78082:482cff0eebda
parent:      78081:7d5e84a44b82
parent:      78079:425d5e89fa25
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Fri Jul 13 20:59:35 2012 +0200
summary:
  Merge

files:
  Doc/c-api/import.rst        |     7 +-
  Lib/imp.py                  |    25 +-
  Lib/importlib/_bootstrap.py |    44 +
  Misc/NEWS                   |     9 +
  Python/import.c             |   195 +-
  Python/importlib.h          |  7865 +++++++++++-----------
  Tools/scripts/highlight.py  |    60 +-
  7 files changed, 4110 insertions(+), 4095 deletions(-)

diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst
--- a/Doc/c-api/import.rst
+++ b/Doc/c-api/import.rst
@@ -163,9 +163,14 @@
 .. c:function:: PyObject* PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname, char *cpathname)
 
    Like :c:func:`PyImport_ExecCodeModuleObject`, but *name*, *pathname* and
-   *cpathname* are UTF-8 encoded strings.
+   *cpathname* are UTF-8 encoded strings. Attempts are also made to figure out
+   what the value for *pathname* should be from *cpathname* if the former is
+   set to ``NULL``.
 
    .. versionadded:: 3.2
+   .. versionchanged:: 3.3
+      Uses :func:`imp.source_from_cache()` in calculating the source path if
+      only the bytecode path is provided.
 
 
 .. c:function:: long PyImport_GetMagicNumber()
diff --git a/Lib/imp.py b/Lib/imp.py
--- a/Lib/imp.py
+++ b/Lib/imp.py
@@ -13,7 +13,7 @@
 
 # Directly exposed by this module
 from importlib._bootstrap import new_module
-from importlib._bootstrap import cache_from_source
+from importlib._bootstrap import cache_from_source, source_from_cache
 
 
 from importlib import _bootstrap
@@ -58,29 +58,6 @@
     return extensions + source + bytecode
 
 
-def source_from_cache(path):
-    """Given the path to a .pyc./.pyo file, return the path to its .py file.
-
-    The .pyc/.pyo file does not need to exist; this simply returns the path to
-    the .py file calculated to correspond to the .pyc/.pyo file.  If path does
-    not conform to PEP 3147 format, ValueError will be raised. If
-    sys.implementation.cache_tag is None then NotImplementedError is raised.
-
-    """
-    if sys.implementation.cache_tag is None:
-        raise NotImplementedError('sys.implementation.cache_tag is None')
-    head, pycache_filename = os.path.split(path)
-    head, pycache = os.path.split(head)
-    if pycache != _bootstrap._PYCACHE:
-        raise ValueError('{} not bottom-level directory in '
-                         '{!r}'.format(_bootstrap._PYCACHE, path))
-    if pycache_filename.count('.') != 2:
-        raise ValueError('expected only 2 dots in '
-                         '{!r}'.format(pycache_filename))
-    base_filename = pycache_filename.partition('.')[0]
-    return os.path.join(head, base_filename + machinery.SOURCE_SUFFIXES[0])
-
-
 class NullImporter:
 
     """Null import object."""
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -428,6 +428,50 @@
     return _path_join(head, _PYCACHE, filename)
 
 
+def source_from_cache(path):
+    """Given the path to a .pyc./.pyo file, return the path to its .py file.
+
+    The .pyc/.pyo file does not need to exist; this simply returns the path to
+    the .py file calculated to correspond to the .pyc/.pyo file.  If path does
+    not conform to PEP 3147 format, ValueError will be raised. If
+    sys.implementation.cache_tag is None then NotImplementedError is raised.
+
+    """
+    if sys.implementation.cache_tag is None:
+        raise NotImplementedError('sys.implementation.cache_tag is None')
+    head, pycache_filename = _path_split(path)
+    head, pycache = _path_split(head)
+    if pycache != _PYCACHE:
+        raise ValueError('{} not bottom-level directory in '
+                         '{!r}'.format(_PYCACHE, path))
+    if pycache_filename.count('.') != 2:
+        raise ValueError('expected only 2 dots in '
+                         '{!r}'.format(pycache_filename))
+    base_filename = pycache_filename.partition('.')[0]
+    return _path_join(head, base_filename + SOURCE_SUFFIXES[0])
+
+
+def _get_sourcefile(bytecode_path):
+    """Convert a bytecode file path to a source path (if possible).
+
+    This function exists purely for backwards-compatibility for
+    PyImport_ExecCodeModuleWithFilenames() in the C API.
+
+    """
+    if len(bytecode_path) == 0:
+        return None
+    rest, _, extension = bytecode_path.rparition('.')
+    if not rest or extension.lower()[-3:-1] != '.py':
+        return bytecode_path
+
+    try:
+        source_path = source_from_cache(bytecode_path)
+    except (NotImplementedError, ValueError):
+        source_path = bytcode_path[-1:]
+
+    return source_path if _path_isfile(source_stats) else bytecode_path
+
+
 def _verbose_message(message, *args):
     """Print the message to stderr if -v/PYTHONVERBOSE is turned on."""
     if sys.flags.verbose:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -92,6 +92,15 @@
 - Issue 10924: Fixed mksalt() to use a RNG that is suitable for cryptographic
   purpose.
 
+C API
+-----
+
+- Issues #15169, #14599: Strip out the C implementation of
+  imp.source_from_cache() used by PyImport_ExecCodeModuleWithPathnames() and
+  used the Python code instead. Leads to PyImport_ExecCodeModuleObject() to not
+  try to infer the source path from the bytecode path as
+  PyImport_ExecCodeModuleWithPathnames() does.
+
 Extension Modules
 -----------------
 
diff --git a/Python/import.c b/Python/import.c
--- a/Python/import.c
+++ b/Python/import.c
@@ -630,8 +630,6 @@
                       "sys.modules failed");
 }
 
-static PyObject * get_sourcefile(PyObject *filename);
-static PyObject *make_source_pathname(PyObject *pathname);
 
 /* Execute a code object in a module and return the module object
  * WITH INCREMENTED REFERENCE COUNT.  If an error occurs, name is
@@ -668,18 +666,37 @@
     if (nameobj == NULL)
         return NULL;
 
+    if (cpathname != NULL) {
+        cpathobj = PyUnicode_DecodeFSDefault(cpathname);
+        if (cpathobj == NULL)
+            goto error;
+    }
+    else
+        cpathobj = NULL;
+
     if (pathname != NULL) {
         pathobj = PyUnicode_DecodeFSDefault(pathname);
         if (pathobj == NULL)
             goto error;
-    } else
+    }
+    else if (cpathobj != NULL) {
+        PyInterpreterState *interp = PyThreadState_GET()->interp;
+        _Py_IDENTIFIER(_get_sourcefile);
+
+        if (interp == NULL) {
+            Py_FatalError("PyImport_ExecCodeModuleWithPathnames: "
+                          "no interpreter!");
+        }
+
+        pathobj = _PyObject_CallMethodObjIdArgs(interp->importlib,
+                                                &PyId__get_sourcefile, cpathobj,
+                                                NULL);
+        if (pathobj == NULL)
+            PyErr_Clear();
+    }
+    else
         pathobj = NULL;
-    if (cpathname != NULL) {
-        cpathobj = PyUnicode_DecodeFSDefault(cpathname);
-        if (cpathobj == NULL)
-            goto error;
-    } else
-        cpathobj = NULL;
+
     m = PyImport_ExecCodeModuleObject(nameobj, co, pathobj, cpathobj);
 error:
     Py_DECREF(nameobj);
@@ -706,18 +723,13 @@
                                  PyEval_GetBuiltins()) != 0)
             goto error;
     }
-    /* Remember the filename as the __file__ attribute */
     if (pathname != NULL) {
-        v = get_sourcefile(pathname);
-        if (v == NULL)
-            PyErr_Clear();
+        v = pathname;
     }
-    else
-        v = NULL;
-    if (v == NULL) {
+    else {
         v = ((PyCodeObject *)co)->co_filename;
-        Py_INCREF(v);
     }
+    Py_INCREF(v);
     if (PyDict_SetItemString(d, "__file__", v) != 0)
         PyErr_Clear(); /* Not important enough to report */
     Py_DECREF(v);
@@ -752,100 +764,6 @@
 }
 
 
-/* Like rightmost_sep, but operate on unicode objects. */
-static Py_ssize_t
-rightmost_sep_obj(PyObject* o, Py_ssize_t start, Py_ssize_t end)
-{
-    Py_ssize_t found, i;
-    Py_UCS4 c;
-    for (found = -1, i = start; i < end; i++) {
-        c = PyUnicode_READ_CHAR(o, i);
-        if (c == SEP
-#ifdef ALTSEP
-            || c == ALTSEP
-#endif
-            )
-        {
-            found = i;
-        }
-    }
-    return found;
-}
-
-
-/* Given a pathname to a Python byte compiled file, return the path to the
-   source file, if the path matches the PEP 3147 format.  This does not check
-   for any file existence, however, if the pyc file name does not match PEP
-   3147 style, NULL is returned.  buf must be at least as big as pathname;
-   the resulting path will always be shorter.
-
-   (...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */
-
-static PyObject*
-make_source_pathname(PyObject *path)
-{
-    Py_ssize_t left, right, dot0, dot1, len;
-    Py_ssize_t i, j;
-    PyObject *result;
-    int kind;
-    void *data;
-
-    len = PyUnicode_GET_LENGTH(path);
-    if (len > MAXPATHLEN)
-        return NULL;
-
-    /* Look back two slashes from the end.  In between these two slashes
-       must be the string __pycache__ or this is not a PEP 3147 style
-       path.  It's possible for there to be only one slash.
-    */
-    right = rightmost_sep_obj(path, 0, len);
-    if (right == -1)
-        return NULL;
-    left = rightmost_sep_obj(path, 0, right);
-    if (left == -1)
-        left = 0;
-    else
-        left++;
-    if (right-left !=  sizeof(CACHEDIR)-1)
-        return NULL;
-    for (i = 0; i < sizeof(CACHEDIR)-1; i++)
-        if (PyUnicode_READ_CHAR(path, left+i) != CACHEDIR[i])
-            return NULL;
-
-    /* Now verify that the path component to the right of the last slash
-       has two dots in it.
-    */
-    dot0 = PyUnicode_FindChar(path, '.', right+1, len, 1);
-    if (dot0 < 0)
-        return NULL;
-    dot1 = PyUnicode_FindChar(path, '.', dot0+1, len, 1);
-    if (dot1 < 0)
-        return NULL;
-    /* Too many dots? */
-    if (PyUnicode_FindChar(path, '.', dot1+1, len, 1) != -1)
-        return NULL;
-
-    /* This is a PEP 3147 path.  Start by copying everything from the
-       start of pathname up to and including the leftmost slash.  Then
-       copy the file's basename, removing the magic tag and adding a .py
-       suffix.
-    */
-    result = PyUnicode_New(left + (dot0-right) + 2,
-                           PyUnicode_MAX_CHAR_VALUE(path));
-    if (!result)
-        return NULL;
-    kind = PyUnicode_KIND(result);
-    data = PyUnicode_DATA(result);
-    PyUnicode_CopyCharacters(result, 0, path, 0, (i = left));
-    PyUnicode_CopyCharacters(result, left, path, right+1,
-                             (j = dot0-right));
-    PyUnicode_WRITE(kind, data, i+j,   'p');
-    PyUnicode_WRITE(kind, data, i+j+1, 'y');
-    assert(_PyUnicode_CheckConsistency(result, 1));
-    return result;
-}
-
-
 static void
 update_code_filenames(PyCodeObject *co, PyObject *oldname, PyObject *newname)
 {
@@ -911,61 +829,6 @@
 }
 
 
-/* Get source file -> unicode or None
- * Returns the path to the py file if available, else the given path
- */
-static PyObject *
-get_sourcefile(PyObject *filename)
-{
-    Py_ssize_t len;
-    PyObject *py;
-    struct stat statbuf;
-    int err;
-    void *data;
-    unsigned int kind;
-
-    len = PyUnicode_GET_LENGTH(filename);
-    if (len == 0)
-        Py_RETURN_NONE;
-
-    /* don't match *.pyc or *.pyo? */
-    data = PyUnicode_DATA(filename);
-    kind = PyUnicode_KIND(filename);
-    if (len < 5
-        || PyUnicode_READ(kind, data, len-4) != '.'
-        || (PyUnicode_READ(kind, data, len-3) != 'p'
-            && PyUnicode_READ(kind, data, len-3) != 'P')
-        || (PyUnicode_READ(kind, data, len-2) != 'y'
-            && PyUnicode_READ(kind, data, len-2) != 'Y'))
-        goto unchanged;
-
-    /* Start by trying to turn PEP 3147 path into source path.  If that
-     * fails, just chop off the trailing character, i.e. legacy pyc path
-     * to py.
-     */
-    py = make_source_pathname(filename);
-    if (py == NULL) {
-        PyErr_Clear();
-        py = PyUnicode_Substring(filename, 0, len - 1);
-    }
-    if (py == NULL)
-        goto error;
-
-    err = _Py_stat(py, &statbuf);
-    if (err == -2)
-        goto error;
-    if (err == 0 && S_ISREG(statbuf.st_mode))
-        return py;
-    Py_DECREF(py);
-    goto unchanged;
-
-error:
-    PyErr_Clear();
-unchanged:
-    Py_INCREF(filename);
-    return filename;
-}
-
 /* Forward */
 static struct _frozen * find_frozen(PyObject *);
 
diff --git a/Python/importlib.h b/Python/importlib.h
--- a/Python/importlib.h
+++ b/Python/importlib.h
[stripped]
diff --git a/Tools/scripts/highlight.py b/Tools/scripts/highlight.py
--- a/Tools/scripts/highlight.py
+++ b/Tools/scripts/highlight.py
@@ -4,12 +4,16 @@
 __author__ = 'Raymond Hettinger'
 
 import keyword, tokenize, cgi, re, functools
+try:
+    import builtins
+except ImportError:
+    import __builtin__ as builtins
 
 #### Analyze Python Source #################################
 
 def is_builtin(s):
     'Return True if s is the name of a builtin'
-    return hasattr(__builtins__, s)
+    return hasattr(builtins, s)
 
 def combine_range(lines, start, end):
     'Join content from a range of lines between start and end'
@@ -21,9 +25,7 @@
 
 def analyze_python(source):
     '''Generate and classify chunks of Python for syntax highlighting.
-       Yields tuples in the form: (leadin_text, category, categorized_text).
-       The final tuple has empty strings for the category and categorized text.
-
+       Yields tuples in the form: (category, categorized_text).
     '''
     lines = source.splitlines(True)
     lines.append('')
@@ -37,7 +39,7 @@
         kind = ''
         if tok_type == tokenize.COMMENT:
             kind = 'comment'
-        elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;':
+        elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@':
             kind = 'operator'
         elif tok_type == tokenize.STRING:
             kind = 'string'
@@ -53,22 +55,20 @@
             elif is_builtin(tok_str) and prev_tok_str != '.':
                 kind = 'builtin'
         if kind:
-            line_upto_token, written = combine_range(lines, written, (srow, scol))
-            line_thru_token, written = combine_range(lines, written, (erow, ecol))
-            yield line_upto_token, kind, line_thru_token
+            text, written = combine_range(lines, written, (srow, scol))
+            yield '', text
+            text, written = combine_range(lines, written, (erow, ecol))
+            yield kind, text
     line_upto_token, written = combine_range(lines, written, (erow, ecol))
-    yield line_upto_token, '', ''
+    yield '', line_upto_token
 
 #### Raw Output  ###########################################
 
 def raw_highlight(classified_text):
     'Straight text display of text classifications'
     result = []
-    for line_upto_token, kind, line_thru_token in classified_text:
-        if line_upto_token:
-            result.append('          plain:  %r\n' % line_upto_token)
-        if line_thru_token:
-            result.append('%15s:  %r\n' % (kind, line_thru_token))
+    for kind, text in classified_text:
+        result.append('%15s:  %r\n' % (kind or 'plain', text))
     return ''.join(result)
 
 #### ANSI Output ###########################################
@@ -88,9 +88,9 @@
     'Add syntax highlighting to source code using ANSI escape sequences'
     # http://en.wikipedia.org/wiki/ANSI_escape_code
     result = []
-    for line_upto_token, kind, line_thru_token in classified_text:
+    for kind, text in classified_text:
         opener, closer = colors.get(kind, ('', ''))
-        result += [line_upto_token, opener, line_thru_token, closer]
+        result += [opener, text, closer]
     return ''.join(result)
 
 #### HTML Output ###########################################
@@ -98,16 +98,13 @@
 def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
     'Convert classified text to an HTML fragment'
     result = [opener]
-    for line_upto_token, kind, line_thru_token in classified_text:
+    for kind, text in classified_text:
         if kind:
-            result += [cgi.escape(line_upto_token),
-                       '<span class="%s">' % kind,
-                       cgi.escape(line_thru_token),
-                       '</span>']
-        else:
-            result += [cgi.escape(line_upto_token),
-                       cgi.escape(line_thru_token)]
-    result += [closer]
+            result.append('<span class="%s">' % kind)
+        result.append(cgi.escape(text))
+        if kind:
+            result.append('</span>')
+    result.append(closer)
     return ''.join(result)
 
 default_css = {
@@ -188,15 +185,12 @@
                     document = default_latex_document):
     'Create a complete LaTeX document with colorized source code'
     result = []
-    for line_upto_token, kind, line_thru_token in classified_text:
+    for kind, text in classified_text:
         if kind:
-            result += [latex_escape(line_upto_token),
-                       r'{\color{%s}' % colors[kind],
-                       latex_escape(line_thru_token),
-                       '}']
-        else:
-            result += [latex_escape(line_upto_token),
-                       latex_escape(line_thru_token)]
+            result.append(r'{\color{%s}' % colors[kind])
+        result.append(latex_escape(text))
+        if kind:
+            result.append('}')
     return default_latex_document % dict(title=title, body=''.join(result))
 
 

-- 
Repository URL: http://hg.python.org/cpython