[pypy-commit] pypy py3.6: (tomek.dziopa ronan) add deprecationwarning for incorrectly escaped strings
tdziopa
pypy.commits at gmail.com
Sat Apr 21 20:18:04 EDT 2018
Author: Tomasz Dziopa <tomek.dziopa at gmail.com>
Branch: py3.6
Changeset: r94408:bce4e51f4d12
Date: 2018-04-21 22:48 +0100
http://bitbucket.org/pypy/pypy/changeset/bce4e51f4d12/
Log: (tomek.dziopa ronan) add deprecationwarning for incorrectly escaped
strings
https://hg.python.org/cpython/rev/ee82266ad35b added verbose
warnings on incorrectly escaped string literals. This PR unifies
pypy behaviour in this matter.
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -114,7 +114,13 @@
v = unicodehelper.decode_utf8(space, substr)
return space.newunicode(v)
- v = PyString_DecodeEscape(space, substr, 'strict', encoding)
+ v, first_escape_error_position = PyString_DecodeEscape(
+ space, substr, 'strict', encoding)
+
+ if first_escape_error_position is not None:
+ space.warn("invalid excape sequence '\\%c'" % ch,
+ space.w_DeprecationWarning)
+
return space.newbytes(v)
def decode_unicode_utf8(space, s, ps, q):
@@ -158,6 +164,7 @@
builder = StringBuilder(len(s))
ps = 0
end = len(s)
+ first_escape_error_position = None
while ps < end:
if s[ps] != '\\':
# note that the C code has a label here.
@@ -237,11 +244,13 @@
builder.append('\\')
ps -= 1
assert ps >= 0
+ if first_escape_error_position is None:
+ first_escape_error_position = ps
continue
# an arbitry number of unescaped UTF-8 bytes may follow.
buf = builder.build()
- return buf
+ return buf, first_escape_error_position
def isxdigit(ch):
diff --git a/pypy/module/__builtin__/test/test_compile.py b/pypy/module/__builtin__/test/test_compile.py
--- a/pypy/module/__builtin__/test/test_compile.py
+++ b/pypy/module/__builtin__/test/test_compile.py
@@ -39,6 +39,13 @@
except SyntaxError as e:
assert e.lineno == 1
+ def test_incorrect_escape_deprecation(self):
+ import warnings
+ with warnings.catch_warnings(record=True) as l:
+ warnings.simplefilter('always', category=DeprecationWarning)
+ compile(r"'\%c'" % 125, '', 'exec')
+ assert l == None
+
def test_unicode_encoding(self):
code = "# -*- coding: utf-8 -*-\npass\n"
compile(code, "tmp", "exec")
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1027,5 +1027,6 @@
def escape_decode(space, w_data, errors='strict'):
data = space.getarg_w('s#', w_data)
from pypy.interpreter.pyparser.parsestring import PyString_DecodeEscape
- result = PyString_DecodeEscape(space, data, errors, None)
+ result, _ = PyString_DecodeEscape(space, data, errors, None)
+
return space.newtuple([space.newbytes(result), space.newint(len(data))])
More information about the pypy-commit
mailing list