[Python-checkins] cpython: Issue #18076: Introduce imoportlib.util.decode_source().

brett.cannon python-checkins at python.org
Mon Jun 17 00:38:02 CEST 2013


http://hg.python.org/cpython/rev/bdd60bedf933
changeset:   84173:bdd60bedf933
user:        Brett Cannon <brett at python.org>
date:        Sun Jun 16 18:37:53 2013 -0400
summary:
  Issue #18076: Introduce imoportlib.util.decode_source().

The helper function makes it easier to implement
imoprtlib.abc.InspectLoader.get_source() by making that function
require just the raw bytes for source code and handling all other
details.

files:
  Doc/library/importlib.rst            |     8 +
  Lib/importlib/_bootstrap.py          |    18 +-
  Lib/importlib/util.py                |     1 +
  Lib/test/test_importlib/test_util.py |    21 +
  Misc/NEWS                            |     2 +
  Python/importlib.h                   |  7149 +++++++------
  6 files changed, 3628 insertions(+), 3571 deletions(-)


diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst
--- a/Doc/library/importlib.rst
+++ b/Doc/library/importlib.rst
@@ -916,6 +916,14 @@
 
    .. versionadded:: 3.4
 
+.. function:: decode_source(source_bytes)
+
+   Decode the given bytes representing source code and return it as a string
+   with universal newlines (as required by
+   :meth:`importlib.abc.InspectLoader.get_source`).
+
+   .. versionadded:: 3.4
+
 .. function:: resolve_name(name, package)
 
    Resolve a relative module name to an absolute one.
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -723,6 +723,18 @@
     return data
 
 
+def decode_source(source_bytes):
+    """Decode bytes representing source code and return the string.
+
+    Universal newline support is used in the decoding.
+    """
+    import tokenize  # To avoid bootstrap issues.
+    source_bytes_readline = _io.BytesIO(source_bytes).readline
+    encoding = tokenize.detect_encoding(source_bytes_readline)
+    newline_decoder = _io.IncrementalNewlineDecoder(None, True)
+    return newline_decoder.decode(source_bytes.decode(encoding[0]))
+
+
 # Loaders #####################################################################
 
 class BuiltinImporter:
@@ -965,11 +977,7 @@
         except OSError as exc:
             raise ImportError("source not available through get_data()",
                               name=fullname) from exc
-        import tokenize
-        readsource = _io.BytesIO(source_bytes).readline
-        encoding = tokenize.detect_encoding(readsource)
-        newline_decoder = _io.IncrementalNewlineDecoder(None, True)
-        return newline_decoder.decode(source_bytes.decode(encoding[0]))
+        return decode_source(source_bytes)
 
     def source_to_code(self, data, path, *, _optimize=-1):
         """Return the code object compiled from source.
diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py
--- a/Lib/importlib/util.py
+++ b/Lib/importlib/util.py
@@ -2,6 +2,7 @@
 
 from ._bootstrap import MAGIC_NUMBER
 from ._bootstrap import cache_from_source
+from ._bootstrap import decode_source
 from ._bootstrap import module_to_load
 from ._bootstrap import set_loader
 from ._bootstrap import set_package
diff --git a/Lib/test/test_importlib/test_util.py b/Lib/test/test_importlib/test_util.py
--- a/Lib/test/test_importlib/test_util.py
+++ b/Lib/test/test_importlib/test_util.py
@@ -9,6 +9,27 @@
 import warnings
 
 
+class DecodeSourceBytesTests(unittest.TestCase):
+
+    source = "string ='ü'"
+
+    def test_ut8_default(self):
+        source_bytes = self.source.encode('utf-8')
+        self.assertEqual(util.decode_source(source_bytes), self.source)
+
+    def test_specified_encoding(self):
+        source = '# coding=latin-1\n' + self.source
+        source_bytes = source.encode('latin-1')
+        assert source_bytes != source.encode('utf-8')
+        self.assertEqual(util.decode_source(source_bytes), source)
+
+    def test_universal_newlines(self):
+        source = '\r\n'.join([self.source, self.source])
+        source_bytes = source.encode('utf-8')
+        self.assertEqual(util.decode_source(source_bytes),
+                         '\n'.join([self.source, self.source]))
+
+
 class ModuleToLoadTests(unittest.TestCase):
 
     module_name = 'ModuleManagerTest_module'
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -123,6 +123,8 @@
 Library
 -------
 
+- Issue #18076: Introduce importlib.util.decode_source().
+
 - importlib.abc.SourceLoader.get_source() no longer changes SyntaxError or
   UnicodeDecodeError into ImportError.
 
diff --git a/Python/importlib.h b/Python/importlib.h
--- a/Python/importlib.h
+++ b/Python/importlib.h
[stripped]

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list