[Pytest-commit] commit/pytest: gutworth: in the default Python 2 case, manually check the source is ASCII (fixes #269)
commits-noreply at bitbucket.org
commits-noreply at bitbucket.org
Fri Mar 8 16:45:23 CET 2013
1 new commit in pytest:
https://bitbucket.org/hpk42/pytest/commits/62e3666c1cec/
changeset: 62e3666c1cec
user: gutworth
date: 2013-03-08 16:44:41
summary: in the default Python 2 case, manually check the source is ASCII (fixes #269)
affected #: 3 files
diff -r a4ad4a06149f957d921d1f9a4e33ed88ddf7cf07 -r 62e3666c1cec8bdd6767dd207a153cb23f2a9ef3 CHANGELOG
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,8 @@
Changes between 2.3.4 and 2.3.5dev
-----------------------------------
+- issue 259 - when assertion rewriting, be consistent with the default
+ source encoding of ASCII on Python 2
+
- issue 251 - report a skip instead of ignoring classes with init
- issue250 unicode/str mixes in parametrization names and values now works
diff -r a4ad4a06149f957d921d1f9a4e33ed88ddf7cf07 -r 62e3666c1cec8bdd6767dd207a153cb23f2a9ef3 _pytest/assertion/rewrite.py
--- a/_pytest/assertion/rewrite.py
+++ b/_pytest/assertion/rewrite.py
@@ -6,6 +6,7 @@
import imp
import marshal
import os
+import re
import struct
import sys
import types
@@ -38,6 +39,7 @@
PYC_TAIL = "." + PYTEST_TAG + PYC_EXT
REWRITE_NEWLINES = sys.version_info[:2] != (2, 7) and sys.version_info < (3, 2)
+ASCII_IS_DEFAULT_ENCODING = sys.version_info[0] < 3
class AssertionRewritingHook(object):
"""PEP302 Import hook which rewrites asserts."""
@@ -187,12 +189,37 @@
RN = "\r\n".encode("utf-8")
N = "\n".encode("utf-8")
+cookie_re = re.compile("coding[:=]\s*[-\w.]+")
+BOM_UTF8 = '\xef\xbb\xbf'
+
def _rewrite_test(state, fn):
"""Try to read and rewrite *fn* and return the code object."""
try:
source = fn.read("rb")
except EnvironmentError:
return None
+ if ASCII_IS_DEFAULT_ENCODING:
+ # ASCII is the default encoding in Python 2. Without a coding
+ # declaration, Python 2 will complain about any bytes in the file
+ # outside the ASCII range. Sadly, this behavior does not extend to
+ # compile() or ast.parse(), which prefer to interpret the bytes as
+ # latin-1. (At least they properly handle explicit coding cookies.) To
+ # preserve this error behavior, we could force ast.parse() to use ASCII
+ # as the encoding by inserting a coding cookie. Unfortunately, that
+ # messes up line numbers. Thus, we have to check ourselves if anything
+ # is outside the ASCII range in the case no encoding is explicitly
+ # declared. For more context, see issue #269. Yay for Python 3 which
+ # gets this right.
+ end1 = source.find("\n")
+ end2 = source.find("\n", end1 + 1)
+ if (not source.startswith(BOM_UTF8) and
+ (not cookie_re.match(source[0:end1]) or
+ not cookie_re.match(source[end1:end2]))):
+ try:
+ source.decode("ascii")
+ except UnicodeDecodeError:
+ # Let it fail in real import.
+ return None
# On Python versions which are not 2.7 and less than or equal to 3.1, the
# parser expects *nix newlines.
if REWRITE_NEWLINES:
diff -r a4ad4a06149f957d921d1f9a4e33ed88ddf7cf07 -r 62e3666c1cec8bdd6767dd207a153cb23f2a9ef3 testing/test_assertrewrite.py
--- a/testing/test_assertrewrite.py
+++ b/testing/test_assertrewrite.py
@@ -394,3 +394,11 @@
b = content.encode("utf-8")
testdir.tmpdir.join("test_newlines.py").write(b, "wb")
assert testdir.runpytest().ret == 0
+
+ @pytest.mark.skipif("sys.version_info[0] >= 3")
+ def test_assume_ascii(self, testdir):
+ content = "u'\xe2\x99\xa5'"
+ testdir.tmpdir.join("test_encoding.py").write(content, "wb")
+ res = testdir.runpytest()
+ assert res.ret != 0
+ assert "SyntaxError: Non-ASCII character" in res.stdout.str()
Repository URL: https://bitbucket.org/hpk42/pytest/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the pytest-commit
mailing list