[pypy-commit] pypy default: merge reorder-map-attributes:

cfbolz pypy.commits at gmail.com
Mon Feb 22 05:54:13 EST 2016


Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: 
Changeset: r82386:572984f4ec26
Date: 2016-02-22 11:53 +0100
http://bitbucket.org/pypy/pypy/changeset/572984f4ec26/

Log:	merge reorder-map-attributes:

	when creating an instance with attributes in a different order than
	some previously existing ordering, switch to that ordering. this
	reduces the number of bridges in some situations.

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -128,6 +128,7 @@
 
 Fix SSL tests by importing cpython's patch
 
+
 .. branch: remove-getfield-pure
 
 Remove pure variants of ``getfield_gc_*`` operations from the JIT. Relevant
@@ -163,3 +164,10 @@
 .. branch: windows-vmprof-support
 
 vmprof should work on Windows.
+
+
+.. branch: reorder-map-attributes
+
+When creating instances and adding attributes in several different orders
+depending on some condition, the JIT would create too much code. This is now
+fixed.
\ No newline at end of file
diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py
--- a/pypy/objspace/std/mapdict.py
+++ b/pypy/objspace/std/mapdict.py
@@ -1,4 +1,4 @@
-import weakref
+import weakref, sys
 
 from rpython.rlib import jit, objectmodel, debug, rerased
 from rpython.rlib.rarithmetic import intmask, r_uint
@@ -12,6 +12,11 @@
 from pypy.objspace.std.typeobject import MutableCell
 
 
+erase_item, unerase_item = rerased.new_erasing_pair("mapdict storage item")
+erase_map,  unerase_map = rerased.new_erasing_pair("map")
+erase_list, unerase_list = rerased.new_erasing_pair("mapdict storage list")
+
+
 # ____________________________________________________________
 # attribute shapes
 
@@ -20,6 +25,7 @@
 # note: we use "x * NUM_DIGITS_POW2" instead of "x << NUM_DIGITS" because
 # we want to propagate knowledge that the result cannot be negative
 
+
 class AbstractAttribute(object):
     _immutable_fields_ = ['terminator']
     cache_attrs = None
@@ -151,29 +157,124 @@
             cache[name, index] = attr
         return attr
 
+    @jit.elidable
+    def _get_cache_attr(self, name, index):
+        key = name, index
+        # this method is not actually elidable, but it's fine anyway
+        if self.cache_attrs is not None:
+            return self.cache_attrs.get(key, None)
+        return None
+
+    def add_attr(self, obj, name, index, w_value):
+        self._reorder_and_add(obj, name, index, w_value)
+        if not jit.we_are_jitted():
+            oldattr = self
+            attr = obj._get_mapdict_map()
+            size_est = (oldattr._size_estimate + attr.size_estimate()
+                                               - oldattr.size_estimate())
+            assert size_est >= (oldattr.length() * NUM_DIGITS_POW2)
+            oldattr._size_estimate = size_est
+
+    def _add_attr_without_reordering(self, obj, name, index, w_value):
+        attr = self._get_new_attr(name, index)
+        attr._switch_map_and_write_storage(obj, w_value)
+
+    @jit.unroll_safe
+    def _switch_map_and_write_storage(self, obj, w_value):
+        if self.length() > obj._mapdict_storage_length():
+            # note that self.size_estimate() is always at least self.length()
+            new_storage = [None] * self.size_estimate()
+            for i in range(obj._mapdict_storage_length()):
+                new_storage[i] = obj._mapdict_read_storage(i)
+            obj._set_mapdict_storage_and_map(new_storage, self)
+
+        # the order is important here: first change the map, then the storage,
+        # for the benefit of the special subclasses
+        obj._set_mapdict_map(self)
+        obj._mapdict_write_storage(self.storageindex, w_value)
+
+
+    @jit.elidable
+    def _find_branch_to_move_into(self, name, index):
+        # walk up the map chain to find an ancestor with lower order that
+        # already has the current name as a child inserted
+        current_order = sys.maxint
+        number_to_readd = 0
+        current = self
+        key = (name, index)
+        while True:
+            attr = None
+            if current.cache_attrs is not None:
+                attr = current.cache_attrs.get(key, None)
+            if attr is None or attr.order > current_order:
+                # we reached the top, so we didn't find it anywhere,
+                # just add it to the top attribute
+                if not isinstance(current, PlainAttribute):
+                    return 0, self._get_new_attr(name, index)
+
+            else:
+                return number_to_readd, attr
+            # if not found try parent
+            number_to_readd += 1
+            current_order = current.order
+            current = current.back
+
     @jit.look_inside_iff(lambda self, obj, name, index, w_value:
             jit.isconstant(self) and
             jit.isconstant(name) and
             jit.isconstant(index))
-    def add_attr(self, obj, name, index, w_value):
-        attr = self._get_new_attr(name, index)
-        oldattr = obj._get_mapdict_map()
-        if not jit.we_are_jitted():
-            size_est = (oldattr._size_estimate + attr.size_estimate()
-                                               - oldattr.size_estimate())
-            assert size_est >= (oldattr.length() * NUM_DIGITS_POW2)
-            oldattr._size_estimate = size_est
-        if attr.length() > obj._mapdict_storage_length():
-            # note that attr.size_estimate() is always at least attr.length()
-            new_storage = [None] * attr.size_estimate()
-            for i in range(obj._mapdict_storage_length()):
-                new_storage[i] = obj._mapdict_read_storage(i)
-            obj._set_mapdict_storage_and_map(new_storage, attr)
+    def _reorder_and_add(self, obj, name, index, w_value):
+        # the idea is as follows: the subtrees of any map are ordered by
+        # insertion.  the invariant is that subtrees that are inserted later
+        # must not contain the name of the attribute of any earlier inserted
+        # attribute anywhere
+        #                              m______
+        #         inserted first      / \ ... \   further attributes
+        #           attrname a      0/  1\    n\
+        #                           m  a must not appear here anywhere
+        #
+        # when inserting a new attribute in an object we check whether any
+        # parent of lower order has seen that attribute yet. if yes, we follow
+        # that branch. if not, we normally append that attribute. When we
+        # follow a prior branch, we necessarily remove some attributes to be
+        # able to do that. They need to be re-added, which has to follow the
+        # reordering procedure recusively.
 
-        # the order is important here: first change the map, then the storage,
-        # for the benefit of the special subclasses
-        obj._set_mapdict_map(attr)
-        obj._mapdict_write_storage(attr.storageindex, w_value)
+        # we store the to-be-readded attribute in the stack, with the map and
+        # the value paired up those are lazily initialized to a list large
+        # enough to store all current attributes
+        stack = None
+        stack_index = 0
+        while True:
+            current = self
+            number_to_readd = 0
+            number_to_readd, attr = self._find_branch_to_move_into(name, index)
+            # we found the attributes further up, need to save the
+            # previous values of the attributes we passed
+            if number_to_readd:
+                if stack is None:
+                    stack = [erase_map(None)] * (self.length() * 2)
+                current = self
+                for i in range(number_to_readd):
+                    assert isinstance(current, PlainAttribute)
+                    w_self_value = obj._mapdict_read_storage(
+                            current.storageindex)
+                    stack[stack_index] = erase_map(current)
+                    stack[stack_index + 1] = erase_item(w_self_value)
+                    stack_index += 2
+                    current = current.back
+            attr._switch_map_and_write_storage(obj, w_value)
+
+            if not stack_index:
+                return
+
+            # readd the current top of the stack
+            stack_index -= 2
+            next_map = unerase_map(stack[stack_index])
+            w_value = unerase_item(stack[stack_index + 1])
+            name = next_map.name
+            index = next_map.index
+            self = obj._get_mapdict_map()
 
     def materialize_r_dict(self, space, obj, dict_w):
         raise NotImplementedError("abstract base class")
@@ -279,7 +380,7 @@
         return Terminator.set_terminator(self, obj, terminator)
 
 class PlainAttribute(AbstractAttribute):
-    _immutable_fields_ = ['name', 'index', 'storageindex', 'back', 'ever_mutated?']
+    _immutable_fields_ = ['name', 'index', 'storageindex', 'back', 'ever_mutated?', 'order']
 
     def __init__(self, name, index, back):
         AbstractAttribute.__init__(self, back.space, back.terminator)
@@ -289,6 +390,7 @@
         self.back = back
         self._size_estimate = self.length() * NUM_DIGITS_POW2
         self.ever_mutated = False
+        self.order = len(back.cache_attrs) if back.cache_attrs else 0
 
     def _copy_attr(self, obj, new_obj):
         w_value = self.read(obj, self.name, self.index)
@@ -542,9 +644,6 @@
 memo_get_subclass_of_correct_size._annspecialcase_ = "specialize:memo"
 _subclass_cache = {}
 
-erase_item, unerase_item = rerased.new_erasing_pair("mapdict storage item")
-erase_list, unerase_list = rerased.new_erasing_pair("mapdict storage list")
-
 def _make_subclass_size_n(supercls, n):
     from rpython.rlib import unroll
     rangen = unroll.unrolling_iterable(range(n))
diff --git a/pypy/objspace/std/test/test_mapdict.py b/pypy/objspace/std/test/test_mapdict.py
--- a/pypy/objspace/std/test/test_mapdict.py
+++ b/pypy/objspace/std/test/test_mapdict.py
@@ -107,6 +107,153 @@
     assert obj2.getdictvalue(space, "b") == 60
     assert obj2.map is obj.map
 
+def test_insert_different_orders():
+    cls = Class()
+    obj = cls.instantiate()
+    obj.setdictvalue(space, "a", 10)
+    obj.setdictvalue(space, "b", 20)
+
+    obj2 = cls.instantiate()
+    obj2.setdictvalue(space, "b", 30)
+    obj2.setdictvalue(space, "a", 40)
+
+    assert obj.map is obj2.map
+
+def test_insert_different_orders_2():
+    cls = Class()
+    obj = cls.instantiate()
+    obj2 = cls.instantiate()
+
+    obj.setdictvalue(space, "a", 10)
+
+    obj2.setdictvalue(space, "b", 20)
+    obj2.setdictvalue(space, "a", 30)
+
+    obj.setdictvalue(space, "b", 40)
+    assert obj.map is obj2.map
+
+def test_insert_different_orders_3():
+    cls = Class()
+    obj = cls.instantiate()
+    obj2 = cls.instantiate()
+    obj3 = cls.instantiate()
+    obj4 = cls.instantiate()
+    obj5 = cls.instantiate()
+    obj6 = cls.instantiate()
+
+    obj.setdictvalue(space, "a", 10)
+    obj.setdictvalue(space, "b", 20)
+    obj.setdictvalue(space, "c", 30)
+
+    obj2.setdictvalue(space, "a", 30)
+    obj2.setdictvalue(space, "c", 40)
+    obj2.setdictvalue(space, "b", 50)
+
+    obj3.setdictvalue(space, "c", 30)
+    obj3.setdictvalue(space, "a", 40)
+    obj3.setdictvalue(space, "b", 50)
+
+    obj4.setdictvalue(space, "c", 30)
+    obj4.setdictvalue(space, "b", 40)
+    obj4.setdictvalue(space, "a", 50)
+
+    obj5.setdictvalue(space, "b", 30)
+    obj5.setdictvalue(space, "a", 40)
+    obj5.setdictvalue(space, "c", 50)
+
+    obj6.setdictvalue(space, "b", 30)
+    obj6.setdictvalue(space, "c", 40)
+    obj6.setdictvalue(space, "a", 50)
+
+    assert obj.map is obj2.map
+    assert obj.map is obj3.map
+    assert obj.map is obj4.map
+    assert obj.map is obj5.map
+    assert obj.map is obj6.map
+
+
+def test_insert_different_orders_4():
+    cls = Class()
+    obj = cls.instantiate()
+    obj2 = cls.instantiate()
+
+    obj.setdictvalue(space, "a", 10)
+    obj.setdictvalue(space, "b", 20)
+    obj.setdictvalue(space, "c", 30)
+    obj.setdictvalue(space, "d", 40)
+
+    obj2.setdictvalue(space, "d", 50)
+    obj2.setdictvalue(space, "c", 50)
+    obj2.setdictvalue(space, "b", 50)
+    obj2.setdictvalue(space, "a", 50)
+
+    assert obj.map is obj2.map
+
+def test_insert_different_orders_5():
+    cls = Class()
+    obj = cls.instantiate()
+    obj2 = cls.instantiate()
+
+    obj.setdictvalue(space, "a", 10)
+    obj.setdictvalue(space, "b", 20)
+    obj.setdictvalue(space, "c", 30)
+    obj.setdictvalue(space, "d", 40)
+
+    obj2.setdictvalue(space, "d", 50)
+    obj2.setdictvalue(space, "c", 50)
+    obj2.setdictvalue(space, "b", 50)
+    obj2.setdictvalue(space, "a", 50)
+
+    obj3 = cls.instantiate()
+    obj3.setdictvalue(space, "d", 50)
+    obj3.setdictvalue(space, "c", 50)
+    obj3.setdictvalue(space, "b", 50)
+    obj3.setdictvalue(space, "a", 50)
+
+    assert obj.map is obj3.map
+
+
+def test_bug_stack_overflow_insert_attributes():
+    cls = Class()
+    obj = cls.instantiate()
+
+    for i in range(1000):
+        obj.setdictvalue(space, str(i), i)
+
+
+def test_insert_different_orders_perm():
+    from itertools import permutations
+    cls = Class()
+    seen_maps = {}
+    for preexisting in ['', 'x', 'xy']:
+        for i, attributes in enumerate(permutations("abcdef")):
+            obj = cls.instantiate()
+            for i, attr in enumerate(preexisting):
+                obj.setdictvalue(space, attr, i*1000)
+            key = preexisting
+            for j, attr in enumerate(attributes):
+                obj.setdictvalue(space, attr, i*10+j)
+                key = "".join(sorted(key+attr))
+                if key in seen_maps:
+                    assert obj.map is seen_maps[key]
+                else:
+                    seen_maps[key] = obj.map
+
+    print len(seen_maps)
+
+
+def test_bug_infinite_loop():
+    cls = Class()
+    obj = cls.instantiate()
+    obj.setdictvalue(space, "e", 1)
+    obj2 = cls.instantiate()
+    obj2.setdictvalue(space, "f", 2)
+    obj3 = cls.instantiate()
+    obj3.setdictvalue(space, "a", 3)
+    obj3.setdictvalue(space, "e", 4)
+    obj3.setdictvalue(space, "f", 5)
+
+
 def test_attr_immutability(monkeypatch):
     cls = Class()
     obj = cls.instantiate()
@@ -359,9 +506,15 @@
 class TestMapDictImplementation(BaseTestRDictImplementation):
     StrategyClass = MapDictStrategy
     get_impl = get_impl
+    def test_setdefault_fast(self):
+        # mapdict can't pass this, which is fine
+        pass
 class TestDevolvedMapDictImplementation(BaseTestDevolvedDictImplementation):
     get_impl = get_impl
     StrategyClass = MapDictStrategy
+    def test_setdefault_fast(self):
+        # mapdict can't pass this, which is fine
+        pass
 
 # ___________________________________________________________
 # tests that check the obj interface after the dict has devolved
@@ -1132,3 +1285,7 @@
 class TestMapDictImplementationUsingnewdict(BaseTestRDictImplementation):
     StrategyClass = MapDictStrategy
     # NB: the get_impl method is not overwritten here, as opposed to above
+
+    def test_setdefault_fast(self):
+        # mapdict can't pass this, which is fine
+        pass


More information about the pypy-commit mailing list