[pypy-svn] r67669 - in pypy/trunk/pypy/objspace/std: . test

cfbolz at codespeak.net cfbolz at codespeak.net
Mon Sep 14 12:01:33 CEST 2009


Author: cfbolz
Date: Mon Sep 14 12:01:33 2009
New Revision: 67669

Added:
   pypy/trunk/pypy/objspace/std/sharingdict.py   (contents, props changed)
   pypy/trunk/pypy/objspace/std/test/test_sharingdict.py   (contents, props changed)
Modified:
   pypy/trunk/pypy/objspace/std/dictmultiobject.py
Log:
Do a bit of counting to predict the final size of an sharing dictionary. This makes it
possible to use a non-resizable list in its implementation, which reduces the
number of indirections and also should make instance creation a bit more
JIT-friendly.


Modified: pypy/trunk/pypy/objspace/std/dictmultiobject.py
==============================================================================
--- pypy/trunk/pypy/objspace/std/dictmultiobject.py	(original)
+++ pypy/trunk/pypy/objspace/std/dictmultiobject.py	Mon Sep 14 12:01:33 2009
@@ -4,8 +4,6 @@
 from pypy.module.__builtin__.__init__ import BUILTIN_TO_INDEX, OPTIMIZED_BUILTINS
 
 from pypy.rlib.objectmodel import r_dict, we_are_translated
-from pypy.rlib.jit import purefunction
-from pypy.rlib.rweakref import RWeakValueDictionary
 
 def _is_str(space, w_key):
     return space.is_w(space.type(w_key), space.w_str)
@@ -686,190 +684,6 @@
             return None
 
 
-class SharedStructure(object):
-    def __init__(self, keys=None, length=0,
-                 last_key=None,
-                 back_struct=None):
-        if keys is None:
-            keys = {}
-        self.keys = keys
-        self.length = length
-        self.back_struct = back_struct
-        other_structs = RWeakValueDictionary(SharedStructure)
-        self.other_structs = other_structs
-        self.last_key = last_key
-        if last_key is not None:
-            assert back_struct is not None
-
-    def new_structure(self, added_key):
-        keys = self.keys.copy()
-        keys[added_key] = len(self.keys)
-        new_structure = SharedStructure(keys, self.length + 1,
-                                        added_key, self)
-        self.other_structs.set(added_key, new_structure)
-        return new_structure
-
-    def lookup_position(self, key):
-        # jit helper
-        self = hint(self, promote=True)
-        key = hint(key, promote=True)
-        return _lookup_position_shared(self, key)
-
-    def get_next_structure(self, key):
-        # jit helper
-        self = hint(self, promote=True)
-        key = hint(key, promote=True)
-        return _get_next_structure_shared(self, key)
-
- at purefunction
-def _lookup_position_shared(self, key):
-    return self.keys.get(key, -1)
-
- at purefunction
-def _get_next_structure_shared(self, key):
-    new_structure = self.other_structs.get(key)
-    if new_structure is None:
-        new_structure = self.new_structure(key)
-    return new_structure
-
-class State(object):
-    def __init__(self, space):
-        self.empty_structure = SharedStructure()
-
-
-class SharedDictImplementation(DictImplementation):
-
-    def __init__(self, space):
-        self.space = space
-        self.structure = space.fromcache(State).empty_structure
-        self.entries = []
-
-    def get(self, w_lookup):
-        space = self.space
-        w_lookup_type = space.type(w_lookup)
-        if space.is_w(w_lookup_type, space.w_str):
-            lookup = space.str_w(w_lookup)
-            i = self.structure.lookup_position(lookup)
-            if i == -1:
-                return None
-            return self.entries[i]
-        elif _is_sane_hash(space, w_lookup_type):
-            return None
-        else:
-            return self._as_rdict().get(w_lookup)
-
-    def setitem(self, w_key, w_value):
-        space = self.space
-        if space.is_w(space.type(w_key), space.w_str):
-            return self.setitem_str(w_key, w_value)
-        else:
-            return self._as_rdict().setitem(w_key, w_value)
-
-    def setitem_str(self, w_key, w_value, shadows_type=True):
-        key = self.space.str_w(w_key)
-        i = self.structure.lookup_position(key)
-        if i != -1:
-            self.entries[i] = w_value
-            return self
-        new_structure = self.structure.get_next_structure(key)
-        self.entries.append(w_value)
-        assert self.structure.length + 1 == new_structure.length
-        self.structure = new_structure
-        assert self.structure.keys[key] >= 0
-        return self
-            
-    def delitem(self, w_key):
-        space = self.space
-        w_key_type = space.type(w_key)
-        if space.is_w(w_key_type, space.w_str):
-            key = space.str_w(w_key)
-            if (self.structure.last_key is not None and
-                key == self.structure.last_key):
-                self.entries.pop()
-                self.structure = self.structure.back_struct
-                return self
-            return self._as_rdict().delitem(w_key)
-        elif _is_sane_hash(space, w_key_type):
-            raise KeyError
-        else:
-            return self._as_rdict().delitem(w_key)
-        
-    def length(self):
-        return self.structure.length
-
-    def iteritems(self):
-        return SharedItemIteratorImplementation(self.space, self)
-
-    def iterkeys(self):
-        return SharedKeyIteratorImplementation(self.space, self)
-
-    def itervalues(self):
-        return SharedValueIteratorImplementation(self.space, self)
-
-    def keys(self):
-        space = self.space
-        return [space.wrap(key)
-                    for (key, item) in self.structure.keys.iteritems()
-                        if item >= 0]
-
-    def values(self):
-        return self.entries[:]
-
-    def items(self):
-        space = self.space
-        return [space.newtuple([space.wrap(key), self.entries[item]])
-                    for (key, item) in self.structure.keys.iteritems()
-                        if item >= 0]
-
-    def _as_rdict(self, as_strdict=False):
-        if as_strdict:
-            newimpl = StrDictImplementation(self.space)
-        else:
-            newimpl = self.space.DefaultDictImpl(self.space)
-        for k, i in self.structure.keys.items():
-            if i >= 0:
-                newimpl.setitem_str(self.space.wrap(k), self.entries[i])
-        return newimpl
-
-
-class SharedValueIteratorImplementation(IteratorImplementation):
-    def __init__(self, space, dictimplementation):
-        IteratorImplementation.__init__(self, space, dictimplementation)
-        self.values = dictimplementation.entries
-
-    def next(self):
-        if self.pos < self.len:
-            return self.values[self.pos]
-        else:
-            self.values = None
-            return None
-
-class SharedItemIteratorImplementation(IteratorImplementation):
-    def __init__(self, space, dictimplementation):
-        IteratorImplementation.__init__(self, space, dictimplementation)
-        self.iterator = dictimplementation.structure.keys.iteritems()
-
-    def next_entry(self):
-        implementation = self.dictimplementation
-        assert isinstance(implementation, SharedDictImplementation)
-        for key, index in self.iterator:
-            w_value = implementation.entries[index]
-            return self.space.newtuple([self.space.wrap(key), w_value])
-        else:
-            return None
-
-class SharedKeyIteratorImplementation(IteratorImplementation):
-    def __init__(self, space, dictimplementation):
-        IteratorImplementation.__init__(self, space, dictimplementation)
-        self.iterator = dictimplementation.structure.keys.iteritems()
-
-    def next_entry(self):
-        implementation = self.dictimplementation
-        assert isinstance(implementation, SharedDictImplementation)
-        for key, index in self.iterator:
-            return self.space.wrap(key)
-        else:
-            return None
 
 import time, py
 
@@ -1052,6 +866,7 @@
         elif space.config.objspace.std.withdictmeasurement:
             w_self.implementation = MeasuringDictImplementation(space)
         elif space.config.objspace.std.withsharingdict and sharing:
+            from pypy.objspace.std.sharingdict import SharedDictImplementation
             w_self.implementation = SharedDictImplementation(space)
         else:
             w_self.implementation = space.emptydictimpl

Added: pypy/trunk/pypy/objspace/std/sharingdict.py
==============================================================================
--- (empty file)
+++ pypy/trunk/pypy/objspace/std/sharingdict.py	Mon Sep 14 12:01:33 2009
@@ -0,0 +1,216 @@
+from pypy.objspace.std.dictmultiobject import DictImplementation, StrDictImplementation
+from pypy.objspace.std.dictmultiobject import IteratorImplementation
+from pypy.objspace.std.dictmultiobject import W_DictMultiObject, _is_sane_hash
+from pypy.rlib.jit import purefunction, hint, we_are_jitted
+from pypy.rlib.rweakref import RWeakValueDictionary
+
+NUM_DIGITS = 4
+
+class SharedStructure(object):
+    _immutable_fields_ = ["keys", "length", "back_struct", "other_structs",
+                          "last_key"]
+
+    def __init__(self, keys=None, length=0,
+                 last_key=None,
+                 back_struct=None):
+        if keys is None:
+            keys = {}
+        self.keys = keys
+        self.length = length
+        self.back_struct = back_struct
+        other_structs = RWeakValueDictionary(SharedStructure)
+        self.other_structs = other_structs
+        self.last_key = last_key
+        self._size_estimate = length << NUM_DIGITS
+        if last_key is not None:
+            assert back_struct is not None
+
+    def new_structure(self, added_key):
+        keys = self.keys.copy()
+        keys[added_key] = len(self.keys)
+        new_structure = SharedStructure(keys, self.length + 1,
+                                        added_key, self)
+        self.other_structs.set(added_key, new_structure)
+        return new_structure
+
+    def lookup_position(self, key):
+        # jit helper
+        self = hint(self, promote=True)
+        key = hint(key, promote=True)
+        return _lookup_position_shared(self, key)
+
+    def get_next_structure(self, key):
+        # jit helper
+        self = hint(self, promote=True)
+        key = hint(key, promote=True)
+        newstruct = _get_next_structure_shared(self, key)
+        if not we_are_jitted():
+            self._size_estimate -= self.size_estimate()
+            self._size_estimate += newstruct.size_estimate()
+        return newstruct
+
+    def size_estimate(self):
+        self = hint(self, promote=True)
+        return _size_estimate(self)
+
+ at purefunction
+def _lookup_position_shared(self, key):
+    return self.keys.get(key, -1)
+
+ at purefunction
+def _get_next_structure_shared(self, key):
+    new_structure = self.other_structs.get(key)
+    if new_structure is None:
+        new_structure = self.new_structure(key)
+    return new_structure
+
+ at purefunction
+def _size_estimate(self):
+    return self._size_estimate >> NUM_DIGITS
+
+
+class State(object):
+    def __init__(self, space):
+        self.empty_structure = SharedStructure()
+        self.emptylist = []
+
+
+class SharedDictImplementation(DictImplementation):
+
+    def __init__(self, space):
+        self.space = space
+        self.structure = space.fromcache(State).empty_structure
+        self.entries = space.fromcache(State).emptylist
+
+    def get(self, w_lookup):
+        space = self.space
+        w_lookup_type = space.type(w_lookup)
+        if space.is_w(w_lookup_type, space.w_str):
+            lookup = space.str_w(w_lookup)
+            i = self.structure.lookup_position(lookup)
+            if i == -1:
+                return None
+            return self.entries[i]
+        elif _is_sane_hash(space, w_lookup_type):
+            return None
+        else:
+            return self._as_rdict().get(w_lookup)
+
+    def setitem(self, w_key, w_value):
+        space = self.space
+        if space.is_w(space.type(w_key), space.w_str):
+            return self.setitem_str(w_key, w_value)
+        else:
+            return self._as_rdict().setitem(w_key, w_value)
+
+    def setitem_str(self, w_key, w_value, shadows_type=True):
+        key = self.space.str_w(w_key)
+        i = self.structure.lookup_position(key)
+        if i != -1:
+            self.entries[i] = w_value
+            return self
+        new_structure = self.structure.get_next_structure(key)
+        if new_structure.length > len(self.entries):
+            new_entries = [None] * new_structure.size_estimate()
+            for i in range(len(self.entries)):
+                new_entries[i] = self.entries[i]
+            self.entries = new_entries
+
+        self.entries[new_structure.length - 1] = w_value
+        assert self.structure.length + 1 == new_structure.length
+        self.structure = new_structure
+        assert self.structure.keys[key] >= 0
+        return self
+            
+    def delitem(self, w_key):
+        space = self.space
+        w_key_type = space.type(w_key)
+        if space.is_w(w_key_type, space.w_str):
+            key = space.str_w(w_key)
+            if (self.structure.last_key is not None and
+                key == self.structure.last_key):
+                self.entries[self.structure.length - 1] = None
+                self.structure = self.structure.back_struct
+                return self
+            return self._as_rdict().delitem(w_key)
+        elif _is_sane_hash(space, w_key_type):
+            raise KeyError
+        else:
+            return self._as_rdict().delitem(w_key)
+        
+    def length(self):
+        return self.structure.length
+
+    def iteritems(self):
+        return SharedItemIteratorImplementation(self.space, self)
+
+    def iterkeys(self):
+        return SharedKeyIteratorImplementation(self.space, self)
+
+    def itervalues(self):
+        return SharedValueIteratorImplementation(self.space, self)
+
+    def keys(self):
+        space = self.space
+        return [space.wrap(key)
+                    for (key, item) in self.structure.keys.iteritems()
+                        if item >= 0]
+
+    def values(self):
+        return self.entries[:self.structure.length]
+
+    def items(self):
+        space = self.space
+        return [space.newtuple([space.wrap(key), self.entries[item]])
+                    for (key, item) in self.structure.keys.iteritems()
+                        if item >= 0]
+
+    def _as_rdict(self, as_strdict=False):
+        if as_strdict:
+            newimpl = StrDictImplementation(self.space)
+        else:
+            newimpl = self.space.DefaultDictImpl(self.space)
+        for k, i in self.structure.keys.items():
+            if i >= 0:
+                newimpl.setitem_str(self.space.wrap(k), self.entries[i])
+        return newimpl
+
+
+class SharedValueIteratorImplementation(IteratorImplementation):
+    def __init__(self, space, dictimplementation):
+        IteratorImplementation.__init__(self, space, dictimplementation)
+        self.values = dictimplementation.entries
+
+    def next(self):
+        if self.pos < self.len:
+            return self.values[self.pos]
+        else:
+            self.values = None
+            return None
+
+class SharedItemIteratorImplementation(IteratorImplementation):
+    def __init__(self, space, dictimplementation):
+        IteratorImplementation.__init__(self, space, dictimplementation)
+        self.iterator = dictimplementation.structure.keys.iteritems()
+
+    def next_entry(self):
+        implementation = self.dictimplementation
+        assert isinstance(implementation, SharedDictImplementation)
+        for key, index in self.iterator:
+            w_value = implementation.entries[index]
+            return self.space.newtuple([self.space.wrap(key), w_value])
+        else:
+            return None
+
+class SharedKeyIteratorImplementation(IteratorImplementation):
+    def __init__(self, space, dictimplementation):
+        IteratorImplementation.__init__(self, space, dictimplementation)
+        self.iterator = dictimplementation.structure.keys.iteritems()
+
+    def next_entry(self):
+        implementation = self.dictimplementation
+        assert isinstance(implementation, SharedDictImplementation)
+        for key, index in self.iterator:
+            return self.space.wrap(key)
+        else:
+            return None

Added: pypy/trunk/pypy/objspace/std/test/test_sharingdict.py
==============================================================================
--- (empty file)
+++ pypy/trunk/pypy/objspace/std/test/test_sharingdict.py	Mon Sep 14 12:01:33 2009
@@ -0,0 +1,29 @@
+from pypy.conftest import gettestobjspace
+from pypy.objspace.std.sharingdict import SharedStructure, NUM_DIGITS
+from pypy.interpreter import gateway
+
+def instance_with_keys(structure, *keys):
+    for key in keys:
+        structure = structure.get_next_structure(key)
+    return structure
+
+def test_size_estimate():
+    empty_structure = SharedStructure()
+    instances = []
+    for i in range(100):
+        instances.append(instance_with_keys(empty_structure, "a", "b", "c", "d", "e", "f"))
+        instances.append(instance_with_keys(empty_structure, "x", "y"))
+    assert empty_structure.size_estimate() == 4
+    assert empty_structure.other_structs.get("a").size_estimate() == 6
+    assert empty_structure.other_structs.get("x").size_estimate() == 2
+
+def test_size_estimate2():
+    empty_structure = SharedStructure()
+    instances = []
+    for i in range(100):
+        instances.append(instance_with_keys(empty_structure, "a", "b", "c", "d", "e", "f"))
+        instances.append(instance_with_keys(empty_structure, "x", "y"))
+        instances.append(instance_with_keys(empty_structure, "x", "y"))
+    assert empty_structure.size_estimate() == 3
+    assert empty_structure.other_structs.get("a").size_estimate() == 6
+    assert empty_structure.other_structs.get("x").size_estimate() == 2



More information about the Pypy-commit mailing list