[pypy-commit] pypy py3.6: Fix. The general issue is space.newtext("string") where the

arigo pypy.commits at gmail.com
Thu Apr 18 06:03:54 EDT 2019


Author: Armin Rigo <arigo at tunes.org>
Branch: py3.6
Changeset: r96520:4a70f02715ab
Date: 2019-04-18 12:03 +0200
http://bitbucket.org/pypy/pypy/changeset/4a70f02715ab/

Log:	Fix. The general issue is space.newtext("string") where the string
	might contain some random byte-chars from app-level.

diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -958,9 +958,17 @@
         unicode_name_handler)
 
     if first_escape_error_char is not None:
+        # Here, 'first_escape_error_char' is a single string character.
+        # Careful, it might be >= '\x80'.  If it is, it would made an
+        # invalid utf-8 string when pasted directory in it.
+        if ' ' <= first_escape_error_char < '\x7f':
+            msg = "invalid escape sequence '\\%s'" % (first_escape_error_char,)
+        else:
+            msg = "invalid escape sequence: '\\' followed by %s" % (
+                space.text_w(space.repr(
+                    space.newbytes(first_escape_error_char))),)
         space.warn(
-            space.newtext("invalid escape sequence '\\%s'"
-                          % str(first_escape_error_char)),
+            space.newtext(msg),
             space.w_DeprecationWarning
         )
     return space.newtuple([space.newutf8(result, lgt), space.newint(u_len)])
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -1394,10 +1394,11 @@
         with warnings.catch_warnings(record=True) as l:
             warnings.simplefilter("always")
             codecs.unicode_escape_decode(b'\\A')
-            codecs.unicode_escape_decode(b"\\A")
+            codecs.unicode_escape_decode(b"\\" + b"\xff")
 
         assert len(l) == 2
         assert isinstance(l[0].message, DeprecationWarning)
+        assert isinstance(l[1].message, DeprecationWarning)
 
     def test_invalid_type_errors(self):
         # hex is not a text encoding. it works via the codecs functions, but


More information about the pypy-commit mailing list