[pypy-commit] pypy py3.6: (tomek.dziopa ronan) add deprecationwarning for incorrectly escaped strings

Sat Apr 21 20:18:04 EDT 2018

Author: Tomasz Dziopa <tomek.dziopa at gmail.com>
Branch: py3.6
Changeset: r94408:bce4e51f4d12
Date: 2018-04-21 22:48 +0100
http://bitbucket.org/pypy/pypy/changeset/bce4e51f4d12/

Log:	(tomek.dziopa ronan) add deprecationwarning for incorrectly escaped
	strings

	https://hg.python.org/cpython/rev/ee82266ad35b added verbose
	warnings on incorrectly escaped string literals. This PR unifies
	pypy behaviour in this matter.

diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -114,7 +114,13 @@
             v = unicodehelper.decode_utf8(space, substr)
             return space.newunicode(v)
 
-    v = PyString_DecodeEscape(space, substr, 'strict', encoding)
+    v, first_escape_error_position = PyString_DecodeEscape(
+        space, substr, 'strict', encoding)
+
+    if first_escape_error_position is not None:
+        space.warn("invalid excape sequence '\\%c'" % ch,
+            space.w_DeprecationWarning)
+
     return space.newbytes(v)
 
 def decode_unicode_utf8(space, s, ps, q):
@@ -158,6 +164,7 @@
     builder = StringBuilder(len(s))
     ps = 0
     end = len(s)
+    first_escape_error_position = None
     while ps < end:
         if s[ps] != '\\':
             # note that the C code has a label here.
@@ -237,11 +244,13 @@
             builder.append('\\')
             ps -= 1
             assert ps >= 0
+            if first_escape_error_position is None:
+                first_escape_error_position = ps
             continue
             # an arbitry number of unescaped UTF-8 bytes may follow.
 
     buf = builder.build()
-    return buf
+    return buf, first_escape_error_position
 
 
 def isxdigit(ch):
diff --git a/pypy/module/__builtin__/test/test_compile.py b/pypy/module/__builtin__/test/test_compile.py
--- a/pypy/module/__builtin__/test/test_compile.py
+++ b/pypy/module/__builtin__/test/test_compile.py
@@ -39,6 +39,13 @@
         except SyntaxError as e:
             assert e.lineno == 1
 
+    def test_incorrect_escape_deprecation(self):
+        import warnings
+        with warnings.catch_warnings(record=True) as l:
+            warnings.simplefilter('always', category=DeprecationWarning)
+            compile(r"'\%c'" % 125, '', 'exec')
+        assert l == None
+
     def test_unicode_encoding(self):
         code = "# -*- coding: utf-8 -*-\npass\n"
         compile(code, "tmp", "exec")
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1027,5 +1027,6 @@
 def escape_decode(space, w_data, errors='strict'):
     data = space.getarg_w('s#', w_data)
     from pypy.interpreter.pyparser.parsestring import PyString_DecodeEscape
-    result = PyString_DecodeEscape(space, data, errors, None)
+    result, _ = PyString_DecodeEscape(space, data, errors, None)
+
     return space.newtuple([space.newbytes(result), space.newint(len(data))])