[pypy-svn] r13753 - in pypy/dist/pypy/rpython: . test

Thu Jun 23 22:00:05 CEST 2005

Author: arigo
Date: Thu Jun 23 22:00:03 2005
New Revision: 13753

Modified:
   pypy/dist/pypy/rpython/llinterp.py
   pypy/dist/pypy/rpython/rdict.py
   pypy/dist/pypy/rpython/rstr.py
   pypy/dist/pypy/rpython/test/test_rdict.py
Log:
(arigo, arre, hpk) 

- CPython-style resizing of rdictionaries.
- added support for r_uint and intmask in llinterp 
- fixed the hash for rstrings.


Modified: pypy/dist/pypy/rpython/llinterp.py
==============================================================================

--- pypy/dist/pypy/rpython/llinterp.py	(original)
+++ pypy/dist/pypy/rpython/llinterp.py	Thu Jun 23 22:00:03 2005
@@ -265,6 +265,10 @@
 from pypy.objspace.flow.operation import FunctionByName
 opimpls = FunctionByName.copy()
 opimpls['is_true'] = bool
+ops_returning_a_bool = {'gt': True, 'ge': True,
+                        'lt': True, 'le': True,
+                        'eq': True, 'ne': True,
+                        'is_true': True}
 
 for typ in (float, int, r_uint):
     typname = typ.__name__
@@ -277,21 +281,29 @@
         optup += 'truediv', 'floordiv', 'and_', 'or_', 'lshift', 'rshift', 'xor'
     for opname in optup:
         assert opname in opimpls
+        if typ is int and opname not in ops_returning_a_bool:
+            adjust_result = 'intmask'
+        else:
+            adjust_result = ''
         pureopname = opname.rstrip('_')
         exec py.code.Source("""
             def %(opnameprefix)s_%(pureopname)s(x, y):
                 assert isinstance(x, %(typname)s)
                 assert isinstance(y, %(typname)s)
                 func = opimpls[%(opname)r]
-                return func(x, y)
+                return %(adjust_result)s(func(x, y))
         """ % locals()).compile()
     for opname in 'is_true', 'neg':
         assert opname in opimpls
+        if typ is int and opname not in ops_returning_a_bool:
+            adjust_result = 'intmask'
+        else:
+            adjust_result = ''
         exec py.code.Source("""
             def %(opnameprefix)s_%(opname)s(x):
                 assert isinstance(x, %(typname)s)
                 func = opimpls[%(opname)r]
-                return func(x)
+                return %(adjust_result)s(func(x))
         """ % locals()).compile()
 
 for opname in ('gt', 'lt', 'ge', 'ne', 'le', 'eq'):

Modified: pypy/dist/pypy/rpython/rdict.py
==============================================================================
--- pypy/dist/pypy/rpython/rdict.py	(original)
+++ pypy/dist/pypy/rpython/rdict.py	Thu Jun 23 22:00:03 2005
@@ -17,7 +17,8 @@
 #    }
 #    
 #    struct dicttable {
-#        int num_used_entries;
+#        int num_items;
+#        int num_pristine_entries;  # never used entries
 #        Array *entries; 
 #    }
 #
@@ -59,7 +60,8 @@
                                                         ('value', self.DICTVALUE))
             self.DICTENTRYARRAY = lltype.GcArray(self.DICTENTRY)
             self.STRDICT.become(lltype.GcStruct("dicttable", 
-                                ("num_used_entries", lltype.Signed), 
+                                ("num_items", lltype.Signed), 
+                                ("num_pristine_entries", lltype.Signed), 
                                 ("entries", lltype.Ptr(self.DICTENTRYARRAY))))
 
     #def convert_const(self, dictobj):
@@ -126,7 +128,7 @@
 deleted_entry_marker = lltype.malloc(STR, 0, immortal=True)
 
 def ll_strdict_len(d):
-    return d.num_used_entries 
+    return d.num_items 
 
 def ll_strdict_getitem(d, key): 
     entry = ll_strdict_lookup(d, key) 
@@ -137,13 +139,17 @@
 
 def ll_strdict_setitem(d, key, value): 
     entry = ll_strdict_lookup(d, key)
-    if not entry.key or entry.key == deleted_entry_marker: 
+    if not entry.key: 
+        entry.key = key 
+        entry.value = value 
+        d.num_items += 1
+        d.num_pristine_entries -= 1
+        if d.num_pristine_entries <= len(d.entries) / 3:
+            ll_strdict_resize(d)
+    elif entry.key == deleted_entry_marker: 
         entry.key = key 
         entry.value = value 
-        d.num_used_entries += 1
-        if d.num_used_entries / 2 > len(d.entries) / 3:
-            ll_strdict_resize(d, len(d.entries) * 2)
-            
+        d.num_items += 1
     else:
         entry.value = value 
 
@@ -152,13 +158,19 @@
     if not entry.key or entry.key == deleted_entry_marker: 
          raise KeyError
     entry.key = deleted_entry_marker
-    d.num_used_entries -= 1
+    d.num_items -= 1
     # XXX: entry.value  = ???
 
-def ll_strdict_resize(d, new_size):
+def ll_strdict_resize(d):
     old_entries = d.entries
     old_size = len(old_entries) 
+    # make a 'new_size' estimate and shrink it if there are many
+    # deleted entry markers
+    new_size = old_size * 2
+    while new_size >= 8 and d.num_items < new_size / 4:
+        new_size /= 2
     d.entries = lltype.malloc(lltype.typeOf(old_entries).TO, new_size)
+    d.num_pristine_entries = new_size - d.num_items
     i = 0
     while i < old_size:
         entry = old_entries[i]
@@ -207,11 +219,13 @@
 # ____________________________________________________________
 #
 #  Irregular operations.
+STRDICT_INITSIZE = 8
 
 def ll_newstrdict(DICTPTR):
     d = lltype.malloc(DICTPTR.TO)
-    d.entries = lltype.malloc(DICTPTR.TO.entries.TO, 8)  # everything is zeroed
-    d.num_used_entries = 0  # but still be explicit
+    d.entries = lltype.malloc(DICTPTR.TO.entries.TO, STRDICT_INITSIZE)
+    d.num_items = 0  # but still be explicit
+    d.num_pristine_entries = STRDICT_INITSIZE 
     return d
 
 def rtype_newdict(hop):
@@ -222,52 +236,3 @@
     v_result = hop.gendirectcall(ll_newstrdict, c1) 
     return v_result
 
-# ____________________________________________________________
-#
-#  Iteration.
-
-if 0: 
-    class ListIteratorRepr(Repr):
-
-        def __init__(self, r_list):
-            self.r_list = r_list
-            self.lowleveltype = lltype.Ptr(GcStruct('listiter',
-                                             ('list', r_list.lowleveltype),
-                                             ('index', Signed)))
-
-        def newiter(self, hop):
-            v_lst, = hop.inputargs(self.r_list)
-            citerptr = hop.inputconst(Void, self.lowleveltype)
-            return hop.gendirectcall(ll_listiter, citerptr, v_lst)
-
-        def rtype_next(self, hop):
-            v_iter, = hop.inputargs(self)
-            return hop.gendirectcall(ll_listnext, v_iter)
-
-    def ll_listiter(ITERPTR, lst):
-        iter = malloc(ITERPTR.TO)
-        iter.list = lst
-        iter.index = 0
-        return iter
-
-    def ll_listnext(iter):
-        l = iter.list
-        index = iter.index
-        if index >= len(l.items):
-            raise StopIteration
-        iter.index = index + 1
-        return l.items[index]
-
-
-    keyhash = rstr.ll_strhash(key) 
-    n = len(d.entries) 
-    index = keyhash & (n - 1)  
-    while 1: 
-        entry = d.entries[index]
-        if not entry.key or en: 
-            break 
-            if entry.key != deleted_entry_marker and rstr.ll_streq(entry.key, key): 
-                break 
-            index = (index + 1) & (n-1)
-    #return entry 
-

Modified: pypy/dist/pypy/rpython/rstr.py
==============================================================================
--- pypy/dist/pypy/rpython/rstr.py	(original)
+++ pypy/dist/pypy/rpython/rstr.py	Thu Jun 23 22:00:03 2005
@@ -365,7 +365,7 @@
             x = -1
         else:
             x = ord(s.chars[0]) << 7
-            i = 1
+            i = 0
             while i < length:
                 x = (1000003*x) ^ ord(s.chars[i])
                 i += 1

Modified: pypy/dist/pypy/rpython/test/test_rdict.py
==============================================================================
--- pypy/dist/pypy/rpython/test/test_rdict.py	(original)
+++ pypy/dist/pypy/rpython/test/test_rdict.py	Thu Jun 23 22:00:03 2005
@@ -77,16 +77,16 @@
         del d[c1]
         return d[c2]
 
+    char_by_hash = {}
     base = 8
-    x = 'a'
-    xh = lowlevelhash(x) % base
-    for y in range(ord('b'), ord('z')): 
-        if lowlevelhash(chr(y)) % base == xh: 
-            break 
-    else: 
-        py.test.skip("XXX improve hash finding algo") 
+    for y in range(0, 256):
+        y = chr(y)
+        y_hash = lowlevelhash(y) % base 
+        char_by_hash.setdefault(y_hash, []).append(y)
+
+    x, y = char_by_hash[0][:2]   # find a collision
        
-    res = interpret(func, [ord(x), y])
+    res = interpret(func, [ord(x), ord(y)])
     assert res == 2
 
     def func2(c1, c2): 
@@ -99,10 +99,29 @@
         d[c1] = 3
         return d 
 
-    res = interpret(func2, [ord(x), y])
+    res = interpret(func2, [ord(x), ord(y)])
     for i in range(len(res.entries)): 
         assert res.entries[i].key != rdict.deleted_entry_marker
 
+    def func3(c0, c1, c2, c3, c4, c5, c6, c7):
+        d = {}
+        c0 = chr(c0) ; d[c0] = 1; del d[c0]
+        c1 = chr(c1) ; d[c1] = 1; del d[c1]
+        c2 = chr(c2) ; d[c2] = 1; del d[c2]
+        c3 = chr(c3) ; d[c3] = 1; del d[c3]
+        c4 = chr(c4) ; d[c4] = 1; del d[c4]
+        c5 = chr(c5) ; d[c5] = 1; del d[c5]
+        c6 = chr(c6) ; d[c6] = 1; del d[c6]
+        c7 = chr(c7) ; d[c7] = 1; del d[c7]
+        return d
+
+    res = interpret(func3, [ord(char_by_hash[i][0]) for i in range(8)])
+    count_frees = 0
+    for i in range(len(res.entries)):
+        if not res.entries[i].key:
+            count_frees += 1
+    assert count_frees >= 3
+
 def test_dict_resize():
     def func():
         d = {}