[pypy-commit] pypy py3k: autodetect the encoding and use it to open the file when calling imp.find_module
antocuni
noreply at buildbot.pypy.org
Tue Feb 28 16:31:37 CET 2012
Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: py3k
Changeset: r52984:7f5530c7685c
Date: 2012-02-28 16:03 +0100
http://bitbucket.org/pypy/pypy/changeset/7f5530c7685c/
Log: autodetect the encoding and use it to open the file when calling
imp.find_module
diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py
--- a/pypy/module/imp/interp_imp.py
+++ b/pypy/module/imp/interp_imp.py
@@ -2,6 +2,7 @@
from pypy.interpreter.error import OperationError, operationerrfmt
from pypy.interpreter.module import Module
from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter.pyparser import pytokenizer
from pypy.objspace.std import unicodetype
from pypy.rlib import streamio
from pypy.module._io.interp_iobase import W_IOBase
@@ -65,12 +66,20 @@
stream = find_info.stream
if stream is not None:
- fd = stream.try_to_find_file_descriptor()
+ # try to find the declared encoding
+ encoding = None
+ firstline = stream.readline()
+ stream.seek(0, 0) # reset position
+ if firstline.startswith('#'):
+ encoding = pytokenizer.match_encoding_declaration(firstline)
+ if encoding is None:
+ encoding = unicodetype.getdefaultencoding(space)
+ #
# in python2, both CPython and PyPy pass the filename to
# open(). However, CPython 3 just passes the fd, so the returned file
# object doesn't have a name attached. We do the same in PyPy, because
# there is no easy way to attach the filename -- too bad
- encoding = unicodetype.getdefaultencoding(space)
+ fd = stream.try_to_find_file_descriptor()
w_fileobj = interp_io.open(space, space.wrap(fd), find_info.filemode,
encoding=encoding)
else:
diff --git a/pypy/module/imp/test/test_app.py b/pypy/module/imp/test/test_app.py
--- a/pypy/module/imp/test/test_app.py
+++ b/pypy/module/imp/test/test_app.py
@@ -1,10 +1,16 @@
from __future__ import with_statement
+from pypy.tool.udir import udir
MARKER = 42
class AppTestImpModule:
def setup_class(cls):
cls.w_imp = cls.space.getbuiltinmodule('imp')
cls.w_file_module = cls.space.wrap(__file__)
+ latin1 = udir.join('latin1.py')
+ latin1.write("# -*- coding: iso-8859-1 -*\n")
+ fake_latin1 = udir.join('fake_latin1.py')
+ fake_latin1.write("print('-*- coding: iso-8859-1 -*')")
+ cls.w_udir = cls.space.wrap(str(udir))
def w__py_file(self):
fn = self.file_module
@@ -33,6 +39,18 @@
assert pathname.endswith('.py') # even if .pyc is up-to-date
assert description in self.imp.get_suffixes()
+ def test_find_module_with_encoding(self):
+ import sys
+ sys.path.insert(0, self.udir)
+ try:
+ file, pathname, description = self.imp.find_module('latin1')
+ assert file.encoding == 'iso-8859-1'
+ #
+ file, pathname, description = self.imp.find_module('fake_latin1')
+ assert file.encoding == 'utf-8'
+ finally:
+ del sys.path[0]
+
def test_load_dynamic(self):
raises(ImportError, self.imp.load_dynamic, 'foo', 'bar')
raises(ImportError, self.imp.load_dynamic, 'foo', 'bar',
More information about the pypy-commit
mailing list