[pypy-commit] pypy faster-set-of-iterator: experimenting with a general "unpack_into" interface. still unclear whether

cfbolz noreply at buildbot.pypy.org
Mon Jul 15 11:49:35 CEST 2013


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: faster-set-of-iterator
Changeset: r65393:8219b71c6184
Date: 2013-07-09 01:34 +0200
http://bitbucket.org/pypy/pypy/changeset/8219b71c6184/

Log:	experimenting with a general "unpack_into" interface. still unclear
	whether it's a good idea.

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -4,8 +4,7 @@
 from rpython.tool.uid import HUGEVAL_BYTES
 from rpython.rlib import jit, types
 from rpython.rlib.debug import make_sure_not_resized
-from rpython.rlib.objectmodel import (we_are_translated, newlist_hint,
-     compute_unique_id)
+from rpython.rlib.objectmodel import (we_are_translated, compute_unique_id)
 from rpython.rlib.signature import signature
 from rpython.rlib.rarithmetic import r_uint
 
@@ -234,6 +233,18 @@
     def __spacebind__(self, space):
         return self
 
+    def unpack_into(self, space, unpack_target):
+        w_iterator = space.iter(self)
+        while True:
+            # YYY jit driver
+            try:
+                w_item = space.next(w_iterator)
+            except OperationError, e:
+                if not e.match(space, space.w_StopIteration):
+                    raise
+                break  # done
+            unpack_target.append(w_item)
+
     def unwrap(self, space):
         """NOT_RPYTHON"""
         # _____ this code is here to support testing only _____
@@ -751,21 +762,20 @@
         """Unpack an iterable into a real (interpreter-level) list.
 
         Raise an OperationError(w_ValueError) if the length is wrong."""
+        from pypy.interpreter import unpack
         w_iterator = self.iter(w_iterable)
         if expected_length == -1:
-            # xxx special hack for speed
-            from pypy.interpreter.generator import GeneratorIterator
-            if isinstance(w_iterator, GeneratorIterator):
-                lst_w = []
-                w_iterator.unpack_into(lst_w)
-                return lst_w
-            # /xxx
-            return self._unpackiterable_unknown_length(w_iterator, w_iterable)
+            unpack_target = unpack.InterpListUnpackTarget(self, w_iterable)
+            self.unpack_into(w_iterable, unpack_target)
+            return unpack_target.items_w
         else:
             lst_w = self._unpackiterable_known_length(w_iterator,
                                                       expected_length)
             return lst_w[:]     # make the resulting list resizable
 
+    def unpack_into(self, w_iterable, unpack_target):
+        w_iterable.unpack_into(self, unpack_target)
+
     def iteriterable(self, w_iterable):
         return W_InterpIterable(self, w_iterable)
 
@@ -773,26 +783,6 @@
         """Unpack an iterable of unknown length into an interp-level
         list.
         """
-        # If we can guess the expected length we can preallocate.
-        try:
-            items = newlist_hint(self.length_hint(w_iterable, 0))
-        except MemoryError:
-            items = [] # it might have lied
-
-        tp = self.type(w_iterator)
-        while True:
-            unpackiterable_driver.jit_merge_point(tp=tp,
-                                                  w_iterator=w_iterator,
-                                                  items=items)
-            try:
-                w_item = self.next(w_iterator)
-            except OperationError, e:
-                if not e.match(self, self.w_StopIteration):
-                    raise
-                break  # done
-            items.append(w_item)
-        #
-        return items
 
     @jit.dont_look_inside
     def _unpackiterable_known_length(self, w_iterator, expected_length):
@@ -805,21 +795,10 @@
     @jit.unroll_safe
     def _unpackiterable_known_length_jitlook(self, w_iterator,
                                              expected_length):
-        items = [None] * expected_length
-        idx = 0
-        while True:
-            try:
-                w_item = self.next(w_iterator)
-            except OperationError, e:
-                if not e.match(self, self.w_StopIteration):
-                    raise
-                break  # done
-            if idx == expected_length:
-                raise OperationError(self.w_ValueError,
-                                    self.wrap("too many values to unpack"))
-            items[idx] = w_item
-            idx += 1
-        if idx < expected_length:
+        from pypy.interpreter import unpack
+        unpack_target = unpack.FixedSizeUnpackTarget(self, w_iterable)
+        self.unpack_into(unpack_target)
+        if unpack_target.index < expected_length:
             if idx == 1:
                 plural = ""
             else:
@@ -827,7 +806,7 @@
             raise operationerrfmt(self.w_ValueError,
                                   "need more than %d value%s to unpack",
                                   idx, plural)
-        return items
+        return unpack_target.items_w
 
     def unpackiterable_unroll(self, w_iterable, expected_length):
         # Like unpackiterable(), but for the cases where we have
@@ -835,6 +814,7 @@
         # Returns a fixed-size list.
         w_iterator = self.iter(w_iterable)
         assert expected_length != -1
+        # YYY correct unrolling
         return self._unpackiterable_known_length_jitlook(w_iterator,
                                                          expected_length)
 
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -177,7 +177,7 @@
                                   reds=['self', 'frame', 'results'],
                                   name='unpack_into')
 
-        def unpack_into(self, results):
+        def unpack_into(self, space, results):
             """This is a hack for performance: runs the generator and collects
             all produced items in a list."""
             # XXX copied and simplified version of send_ex()
diff --git a/pypy/interpreter/unpack.py b/pypy/interpreter/unpack.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/unpack.py
@@ -0,0 +1,34 @@
+from rpython.rlib.objectmodel import newlist_hint
+
+class UnpackTarget(object):
+    def __init__(self, space):
+        self.space = space
+
+    def append(self, w_obj):
+        raise NotImplementedError("abstract base class")
+
+
+class InterpListUnpackTarget(UnpackTarget):
+    def __init__(self, space, w_iterable):
+        self.space = space
+        try:
+            items_w = newlist_hint(self.space.length_hint(w_iterable, 0))
+        except MemoryError:
+            items_w = [] # it might have lied
+        self.items_w = items_w
+
+    def append(self, w_obj):
+        self.items_w.append(w_obj)
+
+
+class FixedSizeUnpackTarget(UnpackTarget):
+    def __init__(self, space, expected_size):
+        self.items_w = [None] * expected_size
+        self.index = 0
+
+    def append(self, w_obj):
+        if self.index == len(self.items_w):
+            raise OperationError(self.w_ValueError,
+                                self.wrap("too many values to unpack"))
+        items[self.index] = w_item
+        self.index += 1
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -14,7 +14,7 @@
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.gateway import (WrappedDefault, unwrap_spec, applevel,
     interp2app)
-from pypy.interpreter.generator import GeneratorIterator
+from pypy.interpreter import unpack
 from pypy.interpreter.signature import Signature
 from pypy.objspace.std import slicetype
 from pypy.objspace.std.floatobject import W_FloatObject
@@ -100,36 +100,13 @@
 
     return space.fromcache(ObjectListStrategy)
 
+class ListUnpackTarget(unpack.UnpackTarget):
+    def __init__(self, space, w_list):
+        self.space = space
+        self.w_list = w_list
 
-def _get_printable_location(w_type):
-    return ('list__do_extend_from_iterable [w_type=%s]' %
-            w_type.getname(w_type.space))
-
-
-_do_extend_jitdriver = jit.JitDriver(
-    name='list__do_extend_from_iterable',
-    greens=['w_type'],
-    reds=['i', 'w_iterator', 'w_list'],
-    get_printable_location=_get_printable_location)
-
-def _do_extend_from_iterable(space, w_list, w_iterable):
-    w_iterator = space.iter(w_iterable)
-    w_type = space.type(w_iterator)
-    i = 0
-    while True:
-        _do_extend_jitdriver.jit_merge_point(w_type=w_type,
-                                             i=i,
-                                             w_iterator=w_iterator,
-                                             w_list=w_list)
-        try:
-            w_list.append(space.next(w_iterator))
-        except OperationError, e:
-            if not e.match(space, space.w_StopIteration):
-                raise
-            break
-        i += 1
-    return i
-
+    def append(self, w_obj):
+        self.w_list.append(w_obj)
 
 def list_unroll_condition(w_list1, space, w_list2):
     return (jit.loop_unrolling_heuristic(w_list1, w_list1.length(),
@@ -790,10 +767,8 @@
         if type(w_any) is W_ListObject or (isinstance(w_any, W_ListObject) and
                                            self.space._uses_list_iter(w_any)):
             self._extend_from_list(w_list, w_any)
-        elif isinstance(w_any, GeneratorIterator):
-            w_any.unpack_into_w(w_list)
-        else:
-            self._extend_from_iterable(w_list, w_any)
+            return
+        self._extend_from_iterable(w_list, w_any)
 
     def _extend_from_list(self, w_list, w_other):
         raise NotImplementedError
@@ -804,11 +779,12 @@
         if length_hint:
             w_list._resize_hint(w_list.length() + length_hint)
 
-        extended = _do_extend_from_iterable(self.space, w_list, w_iterable)
+        self.space.unpack_into(w_iterable, ListUnpackTarget(self.space, w_list))
 
         # cut back if the length hint was too large
-        if extended < length_hint:
-            w_list._resize_hint(w_list.length())
+        length = w_list.length()
+        if length < length_hint:
+            w_list._resize_hint(length)
 
     def reverse(self, w_list):
         raise NotImplementedError
diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -1,4 +1,4 @@
-from pypy.interpreter import gateway
+from pypy.interpreter import gateway, unpack
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.signature import Signature
 from pypy.interpreter.baseobjspace import W_Root
@@ -1568,56 +1568,19 @@
         w_set.sstorage = strategy.get_storage_from_unwrapped_list(intlist)
         return
 
-    iterable_w = space.listview(w_iterable)
+    w_set.strategy = strategy = space.fromcache(EmptySetStrategy)
+    w_set.sstorage = strategy.get_empty_storage()
+    space.unpack_into(w_iterable, SetUnpackTarget(space, w_set))
 
-    if len(iterable_w) == 0:
-        w_set.strategy = strategy = space.fromcache(EmptySetStrategy)
-        w_set.sstorage = strategy.get_empty_storage()
-        return
+class SetUnpackTarget(unpack.UnpackTarget):
+    # YYY is unrolling correctly done?
+    def __init__(self, space, w_set):
+        self.space = space
+        self.w_set = w_set
 
-    _pick_correct_strategy(space, w_set, iterable_w)
+    def append(self, w_obj):
+        self.w_set.add(w_obj)
 
- at jit.look_inside_iff(lambda space, w_set, iterable_w:
-        jit.loop_unrolling_heuristic(iterable_w, len(iterable_w), UNROLL_CUTOFF))
-def _pick_correct_strategy(space, w_set, iterable_w):
-    # check for integers
-    for w_item in iterable_w:
-        if type(w_item) is not W_IntObject:
-            break
-    else:
-        w_set.strategy = space.fromcache(IntegerSetStrategy)
-        w_set.sstorage = w_set.strategy.get_storage_from_list(iterable_w)
-        return
-
-    # check for strings
-    for w_item in iterable_w:
-        if type(w_item) is not W_StringObject:
-            break
-    else:
-        w_set.strategy = space.fromcache(StringSetStrategy)
-        w_set.sstorage = w_set.strategy.get_storage_from_list(iterable_w)
-        return
-
-    # check for unicode
-    for w_item in iterable_w:
-        if type(w_item) is not W_UnicodeObject:
-            break
-    else:
-        w_set.strategy = space.fromcache(UnicodeSetStrategy)
-        w_set.sstorage = w_set.strategy.get_storage_from_list(iterable_w)
-        return
-
-    # check for compares by identity
-    for w_item in iterable_w:
-        if not space.type(w_item).compares_by_identity():
-            break
-    else:
-        w_set.strategy = space.fromcache(IdentitySetStrategy)
-        w_set.sstorage = w_set.strategy.get_storage_from_list(iterable_w)
-        return
-
-    w_set.strategy = space.fromcache(ObjectSetStrategy)
-    w_set.sstorage = w_set.strategy.get_storage_from_list(iterable_w)
 
 init_signature = Signature(['some_iterable'], None, None)
 init_defaults = [None]


More information about the pypy-commit mailing list