[pypy-svn] r54580 - in pypy/branch/gc-tweak/pypy/rpython: lltypesystem memory memory/test

arigo at codespeak.net arigo at codespeak.net
Fri May 9 11:38:03 CEST 2008


Author: arigo
Date: Fri May  9 11:38:01 2008
New Revision: 54580

Added:
   pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py   (contents, props changed)
   pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py   (contents, props changed)
Modified:
   pypy/branch/gc-tweak/pypy/rpython/lltypesystem/rdict.py
   pypy/branch/gc-tweak/pypy/rpython/memory/support.py
   pypy/branch/gc-tweak/pypy/rpython/memory/test/test_support.py
Log:
Kill this address tree and replace it with code that reuses
pypy.rpython.lltypesystem.rdict.  Looks much saner now (not to mention
efficient).

The new lldict.py depends strongly on the internal details of rdict.py,
but at least, if we change rdict.py, test_lldict should cleanly fail.



Modified: pypy/branch/gc-tweak/pypy/rpython/lltypesystem/rdict.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/lltypesystem/rdict.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/lltypesystem/rdict.py	Fri May  9 11:38:01 2008
@@ -76,6 +76,8 @@
             # compute the shape of the DICTENTRY structure
             entryfields = []
             entrymeths = {
+                'allocate': lltype.typeMethod(_ll_malloc_entries),
+                'delete': _ll_free_entries,
                 'must_clear_key':   (isinstance(self.DICTKEY, lltype.Ptr)
                                      and self.DICTKEY._needsgc()),
                 'must_clear_value': (isinstance(self.DICTVALUE, lltype.Ptr)
@@ -189,6 +191,7 @@
                     }
             adtmeths['KEY']   = self.DICTKEY
             adtmeths['VALUE'] = self.DICTVALUE
+            adtmeths['allocate'] = lltype.typeMethod(_ll_malloc_dict)
             self.DICT.become(lltype.GcStruct("dicttable", adtmeths=adtmeths,
                                              *fields))
 
@@ -485,7 +488,7 @@
     new_size = old_size * 2
     while new_size > DICT_INITSIZE and d.num_items < new_size / 4:
         new_size /= 2
-    d.entries = lltype.malloc(lltype.typeOf(old_entries).TO, new_size, zero=True)
+    d.entries = lltype.typeOf(old_entries).TO.allocate(new_size)
     d.num_items = 0
     d.num_pristine_entries = new_size
     i = 0
@@ -495,6 +498,7 @@
             entry = old_entries[i]
             ll_dict_insertclean(d, entry.key, entry.value, hash)
         i += 1
+    old_entries.delete()
 
 # ------- a port of CPython's dictobject.c's lookdict implementation -------
 PERTURB_SHIFT = 5
@@ -582,8 +586,8 @@
 DICT_INITSIZE = 8
 
 def ll_newdict(DICT):
-    d = lltype.malloc(DICT)
-    d.entries = lltype.malloc(DICT.entries.TO, DICT_INITSIZE, zero=True)
+    d = DICT.allocate()
+    d.entries = DICT.entries.TO.allocate(DICT_INITSIZE)
     d.num_items = 0
     d.num_pristine_entries = DICT_INITSIZE
     return d
@@ -594,13 +598,23 @@
     n = DICT_INITSIZE
     while n < length_estimate:
         n *= 2
-    d = lltype.malloc(DICT)
-    d.entries = lltype.malloc(DICT.entries.TO, n, zero=True)
+    d = DICT.allocate()
+    d.entries = DICT.entries.TO.allocate(n)
     d.num_items = 0
     d.num_pristine_entries = DICT_INITSIZE
     return d
 ll_newdict_size.oopspec = 'newdict()'
 
+# pypy.rpython.memory.lldict uses a dict based on Struct and Array
+# instead of GcStruct and GcArray, which is done by using different
+# 'allocate' and 'delete' adtmethod implementations than the ones below
+def _ll_malloc_dict(DICT):
+    return lltype.malloc(DICT)
+def _ll_malloc_entries(ENTRIES, n):
+    return lltype.malloc(ENTRIES, n, zero=True)
+def _ll_free_entries(entries):
+    pass
+
 
 def rtype_r_dict(hop):
     r_dict = hop.r_result
@@ -691,8 +705,8 @@
 def ll_copy(dict):
     DICT = lltype.typeOf(dict).TO
     dictsize = len(dict.entries)
-    d = lltype.malloc(DICT)
-    d.entries = lltype.malloc(DICT.entries.TO, dictsize, zero=True)
+    d = DICT.allocate()
+    d.entries = DICT.entries.TO.allocate(dictsize)
     d.num_items = dict.num_items
     d.num_pristine_entries = dict.num_pristine_entries
     if hasattr(DICT, 'fnkeyeq'):   d.fnkeyeq   = dict.fnkeyeq
@@ -713,10 +727,11 @@
 def ll_clear(d):
     if len(d.entries) == d.num_pristine_entries == DICT_INITSIZE:
         return
-    DICT = lltype.typeOf(d).TO
-    d.entries = lltype.malloc(DICT.entries.TO, DICT_INITSIZE, zero=True)
+    old_entries = d.entries
+    d.entries = lltype.typeOf(old_entries).TO.allocate(DICT_INITSIZE)
     d.num_items = 0
     d.num_pristine_entries = DICT_INITSIZE
+    old_entries.delete()
 
 def ll_update(dic1, dic2):
     entries = dic2.entries

Added: pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py
==============================================================================
--- (empty file)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/lldict.py	Fri May  9 11:38:01 2008
@@ -0,0 +1,82 @@
+from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.lltypesystem import rdict
+from pypy.rlib.objectmodel import we_are_translated
+
+# This is a low-level AddressDict, reusing a lot of the logic from rdict.py.
+# xxx this is very dependent on the details of rdict.py
+
+alloc_count = 0     # for debugging
+
+def count_alloc(delta):
+    "NOT_RPYTHON"
+    global alloc_count
+    alloc_count += delta
+
+
+def newdict():
+    return rdict.ll_newdict(DICT)
+
+def dict_allocate():
+    if not we_are_translated(): count_alloc(+1)
+    return lltype.malloc(DICT, flavor="raw")
+
+def dict_delete(d):
+    dict_delete_entries(d.entries)
+    lltype.free(d, flavor="raw")
+    if not we_are_translated(): count_alloc(-1)
+
+def dict_allocate_entries(n):
+    if not we_are_translated(): count_alloc(+1)
+    # 'raw zero varsize malloc with length field' is not really implemented.
+    # we can initialize the memory to zero manually
+    entries = lltype.malloc(ENTRIES, n, flavor="raw")
+    i = 0
+    while i < n:
+        entries[i].key = llmemory.NULL
+        i += 1
+    return entries
+
+def dict_delete_entries(entries):
+    lltype.free(entries, flavor="raw")
+    if not we_are_translated(): count_alloc(-1)
+
+_hash = llmemory.cast_adr_to_int
+
+def dict_keyhash(d, key):
+    return _hash(key)
+
+def dict_entry_valid(entries, i):
+    return entries[i].key != llmemory.NULL
+
+def dict_entry_hash(entries, i):
+    return _hash(entries[i].key)
+
+def dict_get(d, key, default=llmemory.NULL):
+    return rdict.ll_get(d, key, default)
+
+def dict_add(d, key):
+    rdict.ll_dict_setitem(d, key, llmemory.NULL)
+
+ENTRY = lltype.Struct('ENTRY', ('key', llmemory.Address),
+                               ('value', llmemory.Address))
+ENTRIES = lltype.Array(ENTRY,
+                       adtmeths = {
+                           'allocate': dict_allocate_entries,
+                           'delete': dict_delete_entries,
+                           'valid': dict_entry_valid,
+                           'everused': dict_entry_valid,
+                           'hash': dict_entry_hash,
+                       })
+DICT = lltype.Struct('DICT', ('entries', lltype.Ptr(ENTRIES)),
+                             ('num_items', lltype.Signed),
+                             ('num_pristine_entries', lltype.Signed),
+                     adtmeths = {
+                         'allocate': dict_allocate,
+                         'delete': dict_delete,
+                         'contains': rdict.ll_contains,
+                         'setitem': rdict.ll_dict_setitem,
+                         'get': dict_get,
+                         'add': dict_add,
+                         'keyhash': dict_keyhash,
+                         'keyeq': None,
+                     })

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/support.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/support.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/support.py	Fri May  9 11:38:01 2008
@@ -127,12 +127,15 @@
 
         def stack2dict(self):
             result = AddressDict()
-            self.foreach(result.setitem, llmemory.NULL)
+            self.foreach(_add_in_dict, result)
             return result
 
     cache[chunk_size] = AddressStack
     return AddressStack
 
+def _add_in_dict(item, d):
+    d.add(item)
+
 
 def get_address_deque(chunk_size=DEFAULT_CHUNK_SIZE, cache={}):
     try:
@@ -200,10 +203,12 @@
     cache[chunk_size] = AddressDeque
     return AddressDeque
 
+# ____________________________________________________________
 
 def AddressDict():
     if we_are_translated():
-        return LLAddressDict()
+        from pypy.rpython.memory import lldict
+        return lldict.newdict()
     else:
         return BasicAddressDict()
 
@@ -223,128 +228,3 @@
         self.data[self._key(keyaddr)] = valueaddr
     def add(self, keyaddr):
         self.setitem(keyaddr, llmemory.NULL)
-
-# if diff_bit == -1, the node is a key/value pair (key=left/value=right)
-# if diff_bit >= 0, then the node is the root of a subtree where:
-#   * the keys have all exactly the same bits > diff_bit
-#   * the keys whose 'diff_bit' is 0 are in the 'left' subtree
-#   * the keys whose 'diff_bit' is 1 are in the 'right' subtree
-ADDRDICTNODE = lltype.Struct('AddrDictNode',
-                             ('diff_bit', lltype.Signed),
-                             ('left', llmemory.Address),
-                             ('right', llmemory.Address))
-
-class LLAddressDict(object):
-    _alloc_flavor_ = "raw"
-
-    def __init__(self):
-        self.root = lltype.malloc(ADDRDICTNODE, flavor='raw')
-        self.root.diff_bit = -1
-        self.root.left = llmemory.NULL
-
-    def delete(self):
-        node = self.root
-        parent = lltype.nullptr(ADDRDICTNODE)
-        while True:
-            if node.diff_bit >= 0:
-                next = _node_reveal(node.left)
-                node.left = _node_hide(parent)
-                parent = node
-                node = next
-            else:
-                lltype.free(node, flavor='raw')
-                if not parent:
-                    break
-                node = _node_reveal(parent.right)
-                grandparent = _node_reveal(parent.left)
-                lltype.free(parent, flavor='raw')
-                parent = grandparent
-        free_non_gc_object(self)
-
-    def contains(self, keyaddr):
-        if keyaddr:
-            node = self._lookup(keyaddr)
-            return keyaddr == node.left
-        else:
-            return False
-
-    def get(self, keyaddr, default=llmemory.NULL):
-        if keyaddr:
-            node = self._lookup(keyaddr)
-            if keyaddr == node.left:
-                return node.right
-        return default
-
-    def setitem(self, keyaddr, valueaddr):
-        ll_assert(bool(keyaddr), "cannot store NULL in an AddressDict")
-        node = self._lookup(keyaddr)
-        if node.left == llmemory.NULL or node.left == keyaddr:
-            node.left = keyaddr
-            node.right = valueaddr
-        else:
-            number1 = r_uint(llmemory.cast_adr_to_int(keyaddr))
-            number2 = r_uint(llmemory.cast_adr_to_int(node.left))
-            diff = number1 ^ number2
-            parentnode = self._lookup(keyaddr, difflimit = diff >> 1)
-            # all subnodes of parentnode have a key that is equal to
-            # 'keyaddr' for all bits in range(0, msb(diff)), and differs
-            # from 'keyaddr' exactly at bit msb(diff).
-            # At this point, parentnode.diff_bit < msb(diff).
-            nextbit = parentnode.diff_bit
-            copynode = lltype.malloc(ADDRDICTNODE, flavor='raw')
-            copynode.diff_bit = nextbit
-            copynode.left = parentnode.left
-            copynode.right = parentnode.right
-            bit = self._msb(diff, nextbit + 1)
-            newnode = lltype.malloc(ADDRDICTNODE, flavor='raw')
-            parentnode.diff_bit = bit
-            ll_assert(number1 & (r_uint(1) << bit) !=
-                      number2 & (r_uint(1) << bit), "setitem: bad 'bit'")
-            if number1 & (r_uint(1) << bit):
-                parentnode.left = _node_hide(copynode)
-                parentnode.right = _node_hide(newnode)
-            else:
-                parentnode.left = _node_hide(newnode)
-                parentnode.right = _node_hide(copynode)
-            newnode.diff_bit = -1
-            newnode.left = keyaddr
-            newnode.right = valueaddr
-        if not we_are_translated():
-            assert self.contains(keyaddr)
-
-    def add(self, keyaddr):
-        self.setitem(keyaddr, llmemory.NULL)
-
-    def _msb(self, value, lowerbound=0):
-        # Most Significant Bit: '(1<<result)' is the highest bit set in 'value'
-        ll_assert(value >= (r_uint(1) << lowerbound),
-                  "msb: bad value or lowerbound")
-        if value >= (r_uint(1) << (LONG_BIT-1)):
-            return LONG_BIT-1    # most significant possible bit
-        bit = lowerbound
-        while (value >> bit) > r_uint(1):
-            bit += 1
-        return bit
-
-    def _lookup(self, addr, difflimit=r_uint(0)):
-        # * with difflimit == 0, find and return the leaf node whose key is
-        #   equal to or closest from 'addr'.
-        # * with difflimit > 0, look for the node N closest to the root such
-        #   that all the keys of the subtree starting at node N are equal to
-        #   the given 'addr' at least for all bits > msb(difflimit).
-        number = r_uint(llmemory.cast_adr_to_int(addr))
-        node = self.root
-        while node.diff_bit >= 0:
-            mask = r_uint(1) << node.diff_bit
-            if mask <= difflimit:
-                return node
-            if number & mask:
-                node = _node_reveal(node.right)
-            else:
-                node = _node_reveal(node.left)
-        return node
-
-_node_hide = llmemory.cast_ptr_to_adr
-
-def _node_reveal(nodeaddr):
-    return llmemory.cast_adr_to_ptr(nodeaddr, lltype.Ptr(ADDRDICTNODE))

Added: pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py
==============================================================================
--- (empty file)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/test/test_lldict.py	Fri May  9 11:38:01 2008
@@ -0,0 +1,64 @@
+import random, sys
+from pypy.rpython.lltypesystem import lltype, llmemory
+from pypy.rpython.memory import lldict
+
+
+class TestLLAddressDict:
+
+    def setup_method(self, meth):
+        lldict.alloc_count = 0
+
+    def test_basics(self):
+        d = lldict.newdict()
+        d.add(intaddr(42))
+        d.setitem(intaddr(43), intaddr(44))
+        assert not d.contains(intaddr(41))
+        assert d.contains(intaddr(42))
+        assert d.contains(intaddr(43))
+        assert not d.contains(intaddr(44))
+        assert d.get(intaddr(41)) == llmemory.NULL
+        assert d.get(intaddr(42)) == llmemory.NULL
+        assert d.get(intaddr(43)) == intaddr(44)
+        assert d.get(intaddr(44)) == llmemory.NULL
+        d.delete()
+        assert lldict.alloc_count == 0
+
+    def test_random(self):
+        for i in range(8) + range(8, 80, 10):
+            examples = {}
+            lst = []
+            for j in range(i):
+                if j % 17 == 13:
+                    intval = random.choice(lst)
+                else:
+                    intval = random.randrange(-sys.maxint, sys.maxint) or 1
+                lst.append(intval)
+                examples[intval] = True
+
+            d = lldict.newdict()
+            for intval in lst:
+                d.setitem(intaddr(intval), intaddr(-intval))
+            for intval in lst:
+                assert d.contains(intaddr(intval))
+                assert d.get(intaddr(intval), "???").intval == -intval
+            for intval in lst:
+                for j in range(intval-5, intval+5):
+                    if j not in examples:
+                        assert not d.contains(intaddr(j))
+            assert not d.contains(llmemory.NULL)
+            d.delete()
+            assert lldict.alloc_count == 0
+
+
+class intaddr(object):
+    _TYPE = llmemory.Address
+    def __init__(self, intval):
+        self.intval = intval
+    def _cast_to_int(self):
+        return self.intval
+    def __repr__(self):
+        return '<intaddr 0x%x>' % (self.intval & (sys.maxint*2+1),)
+    def __eq__(self, other):
+        return isinstance(other, intaddr) and self.intval == other.intval
+    def __ne__(self, other):
+        return not self.__eq__(other)

Modified: pypy/branch/gc-tweak/pypy/rpython/memory/test/test_support.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/rpython/memory/test/test_support.py	(original)
+++ pypy/branch/gc-tweak/pypy/rpython/memory/test/test_support.py	Fri May  9 11:38:01 2008
@@ -1,8 +1,6 @@
-import sys
 from pypy.rlib.objectmodel import free_non_gc_object
 from pypy.rpython.memory.support import get_address_stack
 from pypy.rpython.memory.support import get_address_deque
-from pypy.rpython.memory.support import LLAddressDict
 
 from pypy.rpython.test.test_llinterp import interpret
 from pypy.rpython.lltypesystem import lltype, llmemory
@@ -141,44 +139,3 @@
     AddressStack = get_address_stack()
     res = interpret(f, [], malloc_check=False)
     assert res
-
-
-def test_LLAddressDict():
-    import random
-
-    class intaddr(object):
-        _TYPE = llmemory.Address
-        def __init__(self, intval):
-            self.intval = intval
-        def _cast_to_int(self):
-            return self.intval
-        def __repr__(self):
-            return '<intaddr 0x%x>' % (self.intval & (sys.maxint*2+1),)
-        def __eq__(self, other):
-            return isinstance(other, intaddr) and self.intval == other.intval
-        def __ne__(self, other):
-            return not self.__eq__(other)
-
-    for i in range(8) + range(8, 80, 10):
-        examples = {}
-        lst = []
-        for j in range(i):
-            if j % 17 == 13:
-                intval = random.choice(lst)
-            else:
-                intval = random.randrange(-sys.maxint, sys.maxint) or 1
-            lst.append(intval)
-            examples[intval] = True
-
-        d = LLAddressDict()
-        for intval in lst:
-            d.setitem(intaddr(intval), intaddr(-intval))
-        for intval in lst:
-            assert d.contains(intaddr(intval))
-            assert d.get(intaddr(intval), "???").intval == -intval
-        for intval in lst:
-            for j in range(intval-5, intval+5):
-                if j not in examples:
-                    assert not d.contains(intaddr(j))
-        assert not d.contains(llmemory.NULL)
-        d.delete()



More information about the Pypy-commit mailing list