[pypy-commit] pypy unicode-utf8-py3: merge heads

rlamy pypy.commits at gmail.com
Wed Jan 9 14:10:37 EST 2019


Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95596:a56140430aa7
Date: 2019-01-09 19:03 +0000
http://bitbucket.org/pypy/pypy/changeset/a56140430aa7/

Log:	merge heads

diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -8,7 +8,6 @@
 from rpython.rlib.rarithmetic import intmask
 from rpython.rlib import jit, rutf8
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib.runicode import unicode_encode_utf_8
 
 # ____________________________________________________________
 #
@@ -49,10 +48,7 @@
             lgt = rutf8.get_utf8_length(s)
             return space.newutf8(s, lgt)
         elif isinstance(ctx, rsre_core.UnicodeMatchContext):
-            uni = ctx._unicodestr[start:end]
-            uni_utf8 = unicode_encode_utf_8(uni, len(uni), 'strict',
-                                            allow_surrogates=True)
-            return space.newtext(uni_utf8, len(uni))
+            return space.newtext(ctx._unicodestr[start:end])
         else:
             # unreachable
             raise SystemError
@@ -356,6 +352,14 @@
         use_builder = '\x00'   # or 'S'tring or 'U'nicode/UTF8
         is_buffer = False
         filter_as_string = None
+        if space.isinstance_w(w_string, space.w_unicode):
+            if not self.is_known_unicode():
+                raise oefmt(space.w_TypeError,
+                    "cannot use a bytes pattern on a string-like object")
+        else:
+            if self.is_known_unicode():
+                raise oefmt(space.w_TypeError,
+                    "cannot use a string pattern on a bytes-like object")
         if space.is_true(space.callable(w_ptemplate)):
             w_filter = w_ptemplate
             filter_is_callable = True
@@ -365,6 +369,11 @@
                 literal = '\\' not in filter_as_string
                 if space.isinstance_w(w_string, space.w_unicode) and literal:
                     use_builder = 'U'
+            elif space.isinstance_w(w_ptemplate, space.w_bytes):
+                filter_as_string = space.bytes_w(w_ptemplate)
+                literal = '\\' not in filter_as_string
+                if space.isinstance_w(w_string, space.w_bytes) and literal:
+                    use_builder = 'S'
             else:
                 if space.isinstance_w(w_ptemplate, space.w_bytes):
                     filter_as_string = space.bytes_w(w_ptemplate)
@@ -468,7 +477,7 @@
                 raise AssertionError(use_builder)
         else:
             if space.isinstance_w(w_string, space.w_unicode):
-                w_emptystr = space.newtext('')
+                w_emptystr = space.newutf8('', 0)
             else:
                 w_emptystr = space.newbytes('')
             w_item = space.call_method(w_emptystr, 'join',


More information about the pypy-commit mailing list