[pypy-commit] pypy py3.5: Finally found and fixed the UnicodeDecodeError escaping in RPython

arigo pypy.commits at gmail.com
Mon Aug 29 06:17:04 EDT 2016


Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5
Changeset: r86681:7528d42874ea
Date: 2016-08-29 12:16 +0200
http://bitbucket.org/pypy/pypy/changeset/7528d42874ea/

Log:	Finally found and fixed the UnicodeDecodeError escaping in RPython

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -838,11 +838,16 @@
         return w_s1
 
     def get_interned_str(self, s):
-        """Assumes an identifier (utf-8 encoded str)"""
+        """Assumes an identifier (utf-8 encoded str).  Returns None if
+        the identifier is not interned, or not a valid utf-8 string at all.
+        """
         # interface for marshal_impl
         if not we_are_translated():
             assert type(s) is str
-        u = s.decode('utf-8')
+        try:
+            u = s.decode('utf-8')
+        except UnicodeDecodeError:
+            return None
         return self.interned_strings.get(u)   # may be None
 
     def descr_self_interp_w(self, RequiredClass, w_obj):
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -459,7 +459,8 @@
 
 @unmarshaller(TYPE_INTERNED)
 def unmarshal_bytes(space, u, tc):
-    return space.new_interned_str(u.get_str())
+    w_u = unmarshal_unicode(space, u, tc)
+    return u.space.new_interned_w_str(w_u)
 
 def _unmarshal_ascii(u, short_length, interned):
     if short_length:


More information about the pypy-commit mailing list