[pypy-svn] r5010 - pypy/trunk/src/pypy/objspace/std
mwh at codespeak.net
mwh at codespeak.net
Sun Jun 6 16:54:31 CEST 2004
Author: mwh
Date: Sun Jun 6 16:54:31 2004
New Revision: 5010
Modified:
pypy/trunk/src/pypy/objspace/std/dictobject.py
pypy/trunk/src/pypy/objspace/std/objspace.py
Log:
implement dicts as hashtables
doesn't make much difference to performace (!) but it needed
doing at some point
Modified: pypy/trunk/src/pypy/objspace/std/dictobject.py
==============================================================================
--- pypy/trunk/src/pypy/objspace/std/dictobject.py (original)
+++ pypy/trunk/src/pypy/objspace/std/dictobject.py Sun Jun 6 16:54:31 2004
@@ -9,41 +9,87 @@
from pypy.interpreter import gateway
from stringobject import W_StringObject
+dummy = object()
class W_DictObject(W_Object):
from pypy.objspace.std.dicttype import dict_typedef as typedef
def __init__(w_self, space, list_pairs_w):
W_Object.__init__(w_self, space)
- w_self.data = [ [space.unwrap(space.hash(w_key)), w_key, w_value]
- for w_key,w_value in list_pairs_w ]
-
+
+ w_self.used = 0
+ w_self.data = [[0, None, None]]
+ w_self.resize(len(list_pairs_w)*2)
+ for w_k, w_v in list_pairs_w:
+ w_self.insert(space.unwrap(space.hash(w_k)), w_k, w_v)
+
def __repr__(w_self):
""" representation for debugging purposes """
return "%s(%s)" % (w_self.__class__.__name__, w_self.data)
- def lookup(self, w_lookup, create=False):
- # this lookup is where most of the start-up time is consumed.
- # Hashing helps a lot.
+ def insert(self, h, w_key, w_value):
+ cell = self.lookdict(h, w_key)
+ if cell[2] is None:
+ self.used += 1
+ cell[:] = [h, w_key, w_value]
+ else:
+ cell[2] = w_value
+
+ def resize(self, minused):
+ newsize = 1
+ while newsize < minused:
+ newsize *= 2
+ od = self.data
+
+ self.used = 0
+ self.data = [[0, None, None] for i in range(newsize)]
+ for h, k, v in od:
+ if v is not None:
+ self.insert(h, k, v)
+
+ def non_empties(self):
+ return [(h, w_k, w_v) for (h, w_k, w_v) in self.data if w_v is not None]
+
+ def lookdict(self, lookup_hash, w_lookup):
space = self.space
- lookup_hash = space.unwrap(space.hash(w_lookup))
- for cell in self.data:
- if (cell[0] == lookup_hash and
- space.is_true(space.eq(w_lookup, cell[1]))):
- break
+ i = lookup_hash % len(self.data)
+
+ entry = self.data[i]
+ if entry[1] is None or \
+ space.is_true(space.is_(w_lookup, entry[1])):
+ return entry
+ if entry[1] is dummy:
+ freeslot = entry
else:
- if not create:
- raise OperationError(space.w_KeyError, w_lookup)
- cell = [lookup_hash, w_lookup, None]
- self.data.append(cell)
- return cell
+ if entry[0] == lookup_hash and space.is_true(
+ space.eq(entry[1], w_lookup)):
+ return entry
+ freeslot = None
+
+ perturb = lookup_hash
+ while 1:
+ # XXX HAAAAAAACK to avoid FutureWarnings :-(
+ i = ((i & 0x1FFFFFFF) << 2) + i + perturb + 1
+ entry = self.data[i%len(self.data)]
+ if entry[1] is None:
+ if freeslot:
+ return freeslot
+ else:
+ return entry
+ if entry[0] == lookup_hash and entry[1] is not dummy \
+ and space.is_true(
+ space.eq(entry[1], w_lookup)):
+ return entry
+ if entry[1] is dummy and freeslot is None:
+ freeslot = entry
+ perturb >>= 5
registerimplementation(W_DictObject)
def unwrap__Dict(space, w_dict):
result = {}
- for hash, w_key, w_value in w_dict.data:
+ for hash, w_key, w_value in w_dict.non_empties():
result[space.unwrap(w_key)] = space.unwrap(w_value)
return result
@@ -68,28 +114,32 @@
space.call_method(w_dict, 'update', w_kwds)
def getitem__Dict_ANY(space, w_dict, w_lookup):
- return w_dict.lookup(w_lookup)[2]
+ entry = w_dict.lookdict(space.unwrap(space.hash(w_lookup)), w_lookup)
+ if entry[2] is not None:
+ return entry[2]
+ else:
+ raise OperationError(space.w_KeyError, w_lookup)
def setitem__Dict_ANY_ANY(space, w_dict, w_newkey, w_newvalue):
- cell = w_dict.lookup(w_newkey, create=True)
- cell[2] = w_newvalue
+ w_dict.insert(space.unwrap(space.hash(w_newkey)), w_newkey, w_newvalue)
+ if 2*w_dict.used > len(w_dict.data):
+ w_dict.resize(2*w_dict.used)
def delitem__Dict_ANY(space, w_dict, w_lookup):
- cell = w_dict.lookup(w_lookup)
- # overwrite the cell with any other one removed from the dictionary
- cell[:] = w_dict.data.pop()
-
+ entry = w_dict.lookdict(space.unwrap(space.hash(w_lookup)), w_lookup)
+ if entry[2] is not None:
+ w_dict.used -= 1
+ entry[1] = dummy
+ entry[2] = None
+ else:
+ raise OperationError(space.w_KeyError, w_lookup)
+
def len__Dict(space, w_dict):
- return space.wrap(len(w_dict.data))
+ return space.wrap(w_dict.used)
def contains__Dict_ANY(space, w_dict, w_lookup):
- try:
- w_dict.lookup(w_lookup)
- except OperationError:
- # assert e.match(space, space.w_KeyError)
- return space.w_False
- else:
- return space.w_True
+ entry = w_dict.lookdict(space.unwrap(space.hash(w_lookup)), w_lookup)
+ return space.newbool(entry[2] is not None)
dict_has_key__Dict_ANY = contains__Dict_ANY
@@ -102,8 +152,8 @@
if space.is_true(space.is_(w_left, w_right)):
return space.w_True
- dataleft = w_left.data
- dataright = w_right.data
+ dataleft = w_left.non_empties()
+ dataright = w_right.non_empties()
if len(dataleft) != len(dataright):
return space.w_False
for hash, w_key, w_value in dataleft:
@@ -117,8 +167,8 @@
def lt__Dict_Dict(space, w_left, w_right):
# Different sizes, no problem
- dataleft = w_left.data
- dataright = w_right.data
+ dataleft = w_left.non_empties()
+ dataright = w_right.non_empties()
if len(dataleft) < len(dataright):
return space.w_True
if len(dataleft) > len(dataright):
@@ -143,28 +193,33 @@
def dict_copy__Dict(space, w_self):
return W_DictObject(space, [(w_key,w_value)
- for hash,w_key,w_value in w_self.data])
+ for hash,w_key,w_value in w_self.data
+ if w_value is not None])
def dict_items__Dict(space, w_self):
return space.newlist([ space.newtuple([w_key,w_value])
- for hash,w_key,w_value in w_self.data ])
+ for hash,w_key,w_value in w_self.data
+ if w_value is not None])
def dict_keys__Dict(space, w_self):
return space.newlist([ w_key
- for hash,w_key,w_value in w_self.data ])
+ for hash,w_key,w_value in w_self.data
+ if w_value is not None])
def dict_values__Dict(space, w_self):
return space.newlist([ w_value
- for hash,w_key,w_value in w_self.data ])
+ for hash,w_key,w_value in w_self.data
+ if w_value is not None])
def dict_clear__Dict(space, w_self):
- w_self.data = []
+ w_self.data = [[0, None, None]]
+ w_self.used = 0
def dict_get__Dict_ANY_ANY(space, w_dict, w_lookup, w_default):
- try:
- return w_dict.lookup(w_lookup)[2]
- except OperationError:
- # assert e.match(space, space.w_KeyError)
+ entry = w_dict.lookdict(space.unwrap(space.hash(w_lookup)), w_lookup)
+ if entry[2] is not None:
+ return entry[2]
+ else:
return w_default
# Now we only handle one implementation of dicts, this one.
Modified: pypy/trunk/src/pypy/objspace/std/objspace.py
==============================================================================
--- pypy/trunk/src/pypy/objspace/std/objspace.py (original)
+++ pypy/trunk/src/pypy/objspace/std/objspace.py Sun Jun 6 16:54:31 2004
@@ -334,7 +334,7 @@
def is_true(self, w_obj):
# XXX don't look!
if isinstance(w_obj, W_DictObject):
- return not not w_obj.data
+ return not not w_obj.used
else:
return DescrOperation.is_true(self, w_obj)
More information about the Pypy-commit
mailing list