[pypy-commit] pypy unicode-utf8: whack sre until it compiles
fijal
pypy.commits at gmail.com
Sun Dec 3 16:25:26 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r93268:831913dc603e
Date: 2017-12-03 22:14 +0100
http://bitbucket.org/pypy/pypy/changeset/831913dc603e/
Log: whack sre until it compiles
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -6,7 +6,7 @@
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
from pypy.interpreter.error import OperationError, oefmt
from rpython.rlib.rarithmetic import intmask
-from rpython.rlib import jit
+from rpython.rlib import jit, rutf8
from rpython.rlib.rstring import StringBuilder
from rpython.rlib.rutf8 import Utf8StringBuilder
@@ -42,7 +42,9 @@
if isinstance(ctx, rsre_core.StrMatchContext):
return space.newbytes(ctx._string[start:end])
elif isinstance(ctx, rsre_core.UnicodeMatchContext):
- return space.newunicode(ctx._unicodestr[start:end])
+ s = ctx._unicodestr[start:end]
+ lgt, flag = rutf8.check_utf8(s, True)
+ return space.newutf8(s, lgt, flag)
else:
# unreachable
raise SystemError
@@ -110,7 +112,9 @@
if endpos < pos:
endpos = pos
if space.isinstance_w(w_string, space.w_unicode):
- unicodestr = space.unicode_w(w_string)
+ unicodestr = space.utf8_w(w_string)
+ # XXX will fail some tests, the length need to be adjusted for
+ # real char len etc
if pos > len(unicodestr):
pos = len(unicodestr)
if endpos > len(unicodestr):
@@ -341,7 +345,7 @@
unicodebuilder.get_flag()), n
else:
if space.isinstance_w(w_string, space.w_unicode):
- w_emptystr = space.newunicode(u'')
+ w_emptystr = space.newutf8('', 0, rutf8.FLAG_ASCII)
else:
w_emptystr = space.newbytes('')
w_item = space.call_method(w_emptystr, 'join',
@@ -575,7 +579,8 @@
elif isinstance(ctx, rsre_core.StrMatchContext):
return space.newbytes(ctx._string)
elif isinstance(ctx, rsre_core.UnicodeMatchContext):
- return space.newunicode(ctx._unicodestr)
+ lgt, flag = rutf8.check_utf8(ctx._unicodestr, True)
+ return space.newutf8(ctx._unicodestr, lgt, flag)
else:
raise SystemError
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -311,10 +311,10 @@
return res, flag
raise CheckError(~res)
-def get_utf8_length_flag(s):
+def get_utf8_length_flag(s, start=0, end=-1):
""" Get the length and flag out of valid utf8. For now just calls check_utf8
"""
- return check_utf8(s, True)
+ return check_utf8(s, True, start, end)
@jit.elidable
def _check_utf8(s, allow_surrogates, start, stop):
@@ -694,6 +694,12 @@
self._lgt += newlgt
self._flag = combine_flags(self._flag, newflag)
+ def append_slice(self, s, start, end):
+ self._s.append_slice(s, start, end)
+ newlgt, newflag = get_utf8_length_flag(s, start, end)
+ self._lgt += newlgt
+ self._flag = combine_flags(self._flag, newflag)
+
@signature(char(), returns=none())
def append_char(self, s):
# for characters, ascii
More information about the pypy-commit
mailing list