[pypy-commit] pypy py3k: autodetect the encoding and use it to open the file when calling imp.find_module

antocuni noreply at buildbot.pypy.org
Tue Feb 28 16:31:37 CET 2012


Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: py3k
Changeset: r52984:7f5530c7685c
Date: 2012-02-28 16:03 +0100
http://bitbucket.org/pypy/pypy/changeset/7f5530c7685c/

Log:	autodetect the encoding and use it to open the file when calling
	imp.find_module

diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py
--- a/pypy/module/imp/interp_imp.py
+++ b/pypy/module/imp/interp_imp.py
@@ -2,6 +2,7 @@
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.module import Module
 from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter.pyparser import pytokenizer
 from pypy.objspace.std import unicodetype
 from pypy.rlib import streamio
 from pypy.module._io.interp_iobase import W_IOBase
@@ -65,12 +66,20 @@
     stream = find_info.stream
 
     if stream is not None:
-        fd = stream.try_to_find_file_descriptor()
+        # try to find the declared encoding
+        encoding = None
+        firstline = stream.readline()
+        stream.seek(0, 0) # reset position
+        if firstline.startswith('#'):
+            encoding = pytokenizer.match_encoding_declaration(firstline)
+        if encoding is None:
+            encoding = unicodetype.getdefaultencoding(space)
+        #
         # in python2, both CPython and PyPy pass the filename to
         # open(). However, CPython 3 just passes the fd, so the returned file
         # object doesn't have a name attached. We do the same in PyPy, because
         # there is no easy way to attach the filename -- too bad
-        encoding = unicodetype.getdefaultencoding(space)
+        fd = stream.try_to_find_file_descriptor()
         w_fileobj = interp_io.open(space, space.wrap(fd), find_info.filemode,
                                    encoding=encoding)
     else:
diff --git a/pypy/module/imp/test/test_app.py b/pypy/module/imp/test/test_app.py
--- a/pypy/module/imp/test/test_app.py
+++ b/pypy/module/imp/test/test_app.py
@@ -1,10 +1,16 @@
 from __future__ import with_statement
+from pypy.tool.udir import udir
 MARKER = 42
 
 class AppTestImpModule:
     def setup_class(cls):
         cls.w_imp = cls.space.getbuiltinmodule('imp')
         cls.w_file_module = cls.space.wrap(__file__)
+        latin1 = udir.join('latin1.py')
+        latin1.write("# -*- coding: iso-8859-1 -*\n")
+        fake_latin1 = udir.join('fake_latin1.py')
+        fake_latin1.write("print('-*- coding: iso-8859-1 -*')")
+        cls.w_udir = cls.space.wrap(str(udir))
 
     def w__py_file(self):
         fn = self.file_module
@@ -33,6 +39,18 @@
         assert pathname.endswith('.py') # even if .pyc is up-to-date
         assert description in self.imp.get_suffixes()
 
+    def test_find_module_with_encoding(self):
+        import sys
+        sys.path.insert(0, self.udir)
+        try:
+            file, pathname, description = self.imp.find_module('latin1')
+            assert file.encoding == 'iso-8859-1'
+            #
+            file, pathname, description = self.imp.find_module('fake_latin1')
+            assert file.encoding == 'utf-8'
+        finally:
+            del sys.path[0]
+
     def test_load_dynamic(self):
         raises(ImportError, self.imp.load_dynamic, 'foo', 'bar')
         raises(ImportError, self.imp.load_dynamic, 'foo', 'bar',


More information about the pypy-commit mailing list