[pypy-commit] pypy fix_test_codecs: Adding warning with escaping invalid unicode escape characters.
Yusuke Tsutsumi
pypy.commits at gmail.com
Tue May 29 02:08:29 EDT 2018
Author: Yusuke Tsutsumi <yusuke at tsutsumi.io>
Branch: fix_test_codecs
Changeset: r94698:2fd74c4c379d
Date: 2018-05-16 07:13 -0700
http://bitbucket.org/pypy/pypy/changeset/2fd74c4c379d/
Log: Adding warning with escaping invalid unicode escape characters.
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -117,7 +117,7 @@
from pypy.module._codecs import interp_codecs
state = space.fromcache(interp_codecs.CodecState)
unicodedata_handler = state.get_unicodedata_handler(space)
- result, consumed = runicode.str_decode_unicode_escape(
+ result, consumed, first_escape_error_char = runicode.str_decode_unicode_escape(
string, len(string), "strict",
final=True, errorhandler=decode_error_handler(space),
unicodedata_handler=unicodedata_handler)
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -949,11 +949,18 @@
unicode_name_handler = state.get_unicodedata_handler(space)
- result, consumed = runicode.str_decode_unicode_escape(
+ result, consumed, first_escape_error_char = runicode.str_decode_unicode_escape(
string, len(string), errors,
final, state.decode_error_handler,
unicode_name_handler)
+ if first_escape_error_char is not None:
+ space.warn(
+ space.newtext("invalid escape sequence '\\%s'"
+ % str(first_escape_error_char)),
+ space.w_DeprecationWarning
+ )
+
return space.newtuple([space.newunicode(result), space.newint(consumed)])
# ____________________________________________________________
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1435,10 +1435,11 @@
errorhandler = default_unicode_error_decode
if size == 0:
- return u'', 0
+ return u'', 0, None
builder = UnicodeBuilder(size)
pos = 0
+ first_escape_error_char = None
while pos < size:
ch = s[pos]
@@ -1541,10 +1542,11 @@
message, s, pos-1, look+1)
builder.append(res)
else:
+ first_escape_error_char = unichr(ord(ch))
builder.append(u'\\')
builder.append(unichr(ord(ch)))
- return builder.build(), pos
+ return builder.build(), pos, first_escape_error_char
def make_unicode_escape_function(pass_printable=False, unicode_output=False,
quotes=False, prefix=None):
More information about the pypy-commit
mailing list