[pypy-commit] pypy fix_test_codecs: Adding warning with escaping invalid unicode escape characters.

Yusuke Tsutsumi pypy.commits at gmail.com
Tue May 29 02:08:29 EDT 2018


Author: Yusuke Tsutsumi <yusuke at tsutsumi.io>
Branch: fix_test_codecs
Changeset: r94698:2fd74c4c379d
Date: 2018-05-16 07:13 -0700
http://bitbucket.org/pypy/pypy/changeset/2fd74c4c379d/

Log:	Adding warning with escaping invalid unicode escape characters.

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -117,7 +117,7 @@
     from pypy.module._codecs import interp_codecs
     state = space.fromcache(interp_codecs.CodecState)
     unicodedata_handler = state.get_unicodedata_handler(space)
-    result, consumed = runicode.str_decode_unicode_escape(
+    result, consumed, first_escape_error_char = runicode.str_decode_unicode_escape(
         string, len(string), "strict",
         final=True, errorhandler=decode_error_handler(space),
         unicodedata_handler=unicodedata_handler)
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -949,11 +949,18 @@
 
     unicode_name_handler = state.get_unicodedata_handler(space)
 
-    result, consumed = runicode.str_decode_unicode_escape(
+    result, consumed, first_escape_error_char = runicode.str_decode_unicode_escape(
         string, len(string), errors,
         final, state.decode_error_handler,
         unicode_name_handler)
 
+    if first_escape_error_char is not None:
+        space.warn(
+            space.newtext("invalid escape sequence '\\%s'"
+                          % str(first_escape_error_char)),
+            space.w_DeprecationWarning
+        )
+
     return space.newtuple([space.newunicode(result), space.newint(consumed)])
 
 # ____________________________________________________________
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1435,10 +1435,11 @@
         errorhandler = default_unicode_error_decode
 
     if size == 0:
-        return u'', 0
+        return u'', 0, None
 
     builder = UnicodeBuilder(size)
     pos = 0
+    first_escape_error_char = None
     while pos < size:
         ch = s[pos]
 
@@ -1541,10 +1542,11 @@
                                         message, s, pos-1, look+1)
                 builder.append(res)
         else:
+            first_escape_error_char = unichr(ord(ch))
             builder.append(u'\\')
             builder.append(unichr(ord(ch)))
 
-    return builder.build(), pos
+    return builder.build(), pos, first_escape_error_char
 
 def make_unicode_escape_function(pass_printable=False, unicode_output=False,
                                  quotes=False, prefix=None):


More information about the pypy-commit mailing list