[pypy-commit] pypy default: add shortcut to ensure that 'for c in uni' does not compute the index storage

Sun Feb 17 07:40:37 EST 2019

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: 
Changeset: r96035:637f18678c1c
Date: 2019-02-17 12:32 +0100
http://bitbucket.org/pypy/pypy/changeset/637f18678c1c/

Log:	add shortcut to ensure that 'for c in uni' does not compute the
	index storage

diff --git a/pypy/objspace/std/iterobject.py b/pypy/objspace/std/iterobject.py
--- a/pypy/objspace/std/iterobject.py
+++ b/pypy/objspace/std/iterobject.py
@@ -92,6 +92,33 @@
         return w_item
 
 
+class W_FastUnicodeIterObject(W_AbstractSeqIterObject):
+    """Sequence iterator specialized for unicode objects."""
+
+    def __init__(self, w_seq):
+        from pypy.objspace.std.unicodeobject import W_UnicodeObject
+        W_AbstractSeqIterObject.__init__(self, w_seq)
+        assert isinstance(w_seq, W_UnicodeObject)
+        self.byteindex = 0
+
+    def descr_next(self, space):
+        from pypy.objspace.std.unicodeobject import W_UnicodeObject
+        from rpython.rlib import rutf8
+        w_seq = self.w_seq
+        if w_seq is None:
+            raise OperationError(space.w_StopIteration, space.w_None)
+        assert isinstance(w_seq, W_UnicodeObject)
+        index = self.index
+        if index == w_seq._length:
+            self.w_seq = None
+            raise OperationError(space.w_StopIteration, space.w_None)
+        start = self.byteindex
+        end = rutf8.next_codepoint_pos(w_seq._utf8, start)
+        w_res = W_UnicodeObject(w_seq._utf8[start:end], 1)
+        self.byteindex = end
+        return w_res
+
+
 class W_FastTupleIterObject(W_AbstractSeqIterObject):
     """Sequence iterator specialized for tuples, accessing directly
     their RPython-level list of wrapped objects.
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -22,6 +22,7 @@
 from pypy.objspace.std.floatobject import W_FloatObject
 from pypy.objspace.std.intobject import W_IntObject, setup_prebuilt, wrapint
 from pypy.objspace.std.iterobject import W_AbstractSeqIterObject, W_SeqIterObject
+from pypy.objspace.std.iterobject import W_FastUnicodeIterObject
 from pypy.objspace.std.listobject import W_ListObject
 from pypy.objspace.std.longobject import W_LongObject, newlong
 from pypy.objspace.std.memoryobject import W_MemoryView
@@ -339,6 +340,8 @@
         return W_SliceObject(w_start, w_end, w_step)
 
     def newseqiter(self, w_obj):
+        if type(w_obj) is W_UnicodeObject:
+            return W_FastUnicodeIterObject(w_obj)
         return W_SeqIterObject(w_obj)
 
     def newbuffer(self, obj):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -41,6 +41,18 @@
                 space.w_unicode, "__new__", space.w_unicode, w_uni)
         assert w_new is w_uni
 
+    def test_fast_iter(self):
+        space = self.space
+        w_uni = space.newutf8(u"aä".encode("utf-8"), 2)
+        old_index_storage = w_uni._index_storage
+        w_iter = space.iter(w_uni)
+        w_char1 = w_iter.descr_next(space)
+        w_char2 = w_iter.descr_next(space)
+        assert w_uni._index_storage is old_index_storage
+        assert space.eq_w(w_char1, w_uni._getitem_result(space, 0))
+        assert space.eq_w(w_char2, w_uni._getitem_result(space, 1))
+
+
     if HAS_HYPOTHESIS:
         @given(strategies.text(), strategies.integers(min_value=0, max_value=10),
                                   strategies.integers(min_value=-1, max_value=10))