[pypy-commit] pypy default: add shortcut to ensure that 'for c in uni' does not compute the index storage
cfbolz
pypy.commits at gmail.com
Sun Feb 17 07:40:37 EST 2019
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch:
Changeset: r96035:637f18678c1c
Date: 2019-02-17 12:32 +0100
http://bitbucket.org/pypy/pypy/changeset/637f18678c1c/
Log: add shortcut to ensure that 'for c in uni' does not compute the
index storage
diff --git a/pypy/objspace/std/iterobject.py b/pypy/objspace/std/iterobject.py
--- a/pypy/objspace/std/iterobject.py
+++ b/pypy/objspace/std/iterobject.py
@@ -92,6 +92,33 @@
return w_item
+class W_FastUnicodeIterObject(W_AbstractSeqIterObject):
+ """Sequence iterator specialized for unicode objects."""
+
+ def __init__(self, w_seq):
+ from pypy.objspace.std.unicodeobject import W_UnicodeObject
+ W_AbstractSeqIterObject.__init__(self, w_seq)
+ assert isinstance(w_seq, W_UnicodeObject)
+ self.byteindex = 0
+
+ def descr_next(self, space):
+ from pypy.objspace.std.unicodeobject import W_UnicodeObject
+ from rpython.rlib import rutf8
+ w_seq = self.w_seq
+ if w_seq is None:
+ raise OperationError(space.w_StopIteration, space.w_None)
+ assert isinstance(w_seq, W_UnicodeObject)
+ index = self.index
+ if index == w_seq._length:
+ self.w_seq = None
+ raise OperationError(space.w_StopIteration, space.w_None)
+ start = self.byteindex
+ end = rutf8.next_codepoint_pos(w_seq._utf8, start)
+ w_res = W_UnicodeObject(w_seq._utf8[start:end], 1)
+ self.byteindex = end
+ return w_res
+
+
class W_FastTupleIterObject(W_AbstractSeqIterObject):
"""Sequence iterator specialized for tuples, accessing directly
their RPython-level list of wrapped objects.
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -22,6 +22,7 @@
from pypy.objspace.std.floatobject import W_FloatObject
from pypy.objspace.std.intobject import W_IntObject, setup_prebuilt, wrapint
from pypy.objspace.std.iterobject import W_AbstractSeqIterObject, W_SeqIterObject
+from pypy.objspace.std.iterobject import W_FastUnicodeIterObject
from pypy.objspace.std.listobject import W_ListObject
from pypy.objspace.std.longobject import W_LongObject, newlong
from pypy.objspace.std.memoryobject import W_MemoryView
@@ -339,6 +340,8 @@
return W_SliceObject(w_start, w_end, w_step)
def newseqiter(self, w_obj):
+ if type(w_obj) is W_UnicodeObject:
+ return W_FastUnicodeIterObject(w_obj)
return W_SeqIterObject(w_obj)
def newbuffer(self, obj):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -41,6 +41,18 @@
space.w_unicode, "__new__", space.w_unicode, w_uni)
assert w_new is w_uni
+ def test_fast_iter(self):
+ space = self.space
+ w_uni = space.newutf8(u"aä".encode("utf-8"), 2)
+ old_index_storage = w_uni._index_storage
+ w_iter = space.iter(w_uni)
+ w_char1 = w_iter.descr_next(space)
+ w_char2 = w_iter.descr_next(space)
+ assert w_uni._index_storage is old_index_storage
+ assert space.eq_w(w_char1, w_uni._getitem_result(space, 0))
+ assert space.eq_w(w_char2, w_uni._getitem_result(space, 1))
+
+
if HAS_HYPOTHESIS:
@given(strategies.text(), strategies.integers(min_value=0, max_value=10),
strategies.integers(min_value=-1, max_value=10))
More information about the pypy-commit
mailing list