[Python-checkins] CVS: python/dist/src/Objects dictobject.c,2.63,2.64

Fred L. Drake python-dev@python.org
Thu, 31 Aug 2000 12:31:40 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory slayer.i.sourceforge.net:/tmp/cvs-serv3780/Objects

Modified Files:
	dictobject.c 
Log Message:

Slight performance hack that also avoids requiring the existence of thread
state for dictionaries that have only been indexed by string keys.

See the comments in SourceForge for more.

This closes SourceForge patch #101309.


Index: dictobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/dictobject.c,v
retrieving revision 2.63
retrieving revision 2.64
diff -C2 -r2.63 -r2.64
*** dictobject.c	2000/08/31 19:04:07	2.63
--- dictobject.c	2000/08/31 19:31:38	2.64
***************
*** 20,23 ****
--- 20,26 ----
  #define MINSIZE 4
  
+ /* define this out if you don't want conversion statistics on exit */
+ #undef SHOW_CONVERSION_COUNTS
+ 
  /*
  Table of irreducible polynomials to efficiently cycle through
***************
*** 83,87 ****
  when it is more than half filled.
  */
! typedef struct {
  	PyObject_HEAD
  	int ma_fill;
--- 86,91 ----
  when it is more than half filled.
  */
! typedef struct dictobject dictobject;
! struct dictobject {
  	PyObject_HEAD
  	int ma_fill;
***************
*** 90,94 ****
  	int ma_poly;
  	dictentry *ma_table;
! } dictobject;
  
  PyObject *
--- 94,116 ----
  	int ma_poly;
  	dictentry *ma_table;
! 	dictentry *(*ma_lookup)(dictobject *mp, PyObject *key, long hash);
! };
! 
! /* forward declarations */
! static dictentry *
! lookdict_string(dictobject *mp, PyObject *key, long hash);
! 
! #ifdef SHOW_CONVERSION_COUNTS
! static long created = 0L;
! static long converted = 0L;
! 
! static void
! show_counts(void)
! {
! 	fprintf(stderr, "created %ld string dicts\n", created);
! 	fprintf(stderr, "converted %ld to normal dicts\n", converted);
! 	fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created);
! }
! #endif
  
  PyObject *
***************
*** 100,103 ****
--- 122,128 ----
  		if (dummy == NULL)
  			return NULL;
+ #ifdef SHOW_CONVERSION_COUNTS
+ 		Py_AtExit(show_counts);
+ #endif
  	}
  	mp = PyObject_NEW(dictobject, &PyDict_Type);
***************
*** 109,112 ****
--- 134,141 ----
  	mp->ma_fill = 0;
  	mp->ma_used = 0;
+ 	mp->ma_lookup = lookdict_string;
+ #ifdef SHOW_CONVERSION_COUNTS
+ 	++created;
+ #endif
  	PyObject_GC_Init(mp);
  	return (PyObject *)mp;
***************
*** 130,133 ****
--- 159,166 ----
  (This version is due to Reimer Behrends, some ideas are also due to
  Jyrki Alakuijala and Vladimir Marangozov.)
+ 
+ This function must never return NULL; failures are indicated by returning
+ a dictentry* for which the me_value field is NULL.  Exceptions are never
+ reported by this function, and outstanding exceptions are maintained.
  */
  static dictentry *
***************
*** 227,230 ****
--- 260,340 ----
  
  /*
+  * Hacked up version of lookdict which can assume keys are always strings;
+  * this assumption allows testing for errors during PyObject_Compare() to
+  * be dropped; string-string comparisons never raise exceptions.  This also
+  * means we don't need to go through PyObject_Compare(); we can always use
+  * the tp_compare slot of the string type object directly.
+  *
+  * This really only becomes meaningful if proper error handling in lookdict()
+  * is too expensive.
+  */
+ static dictentry *
+ lookdict_string(dictobject *mp, PyObject *key, register long hash)
+ {
+ 	register int i;
+ 	register unsigned incr;
+ 	register dictentry *freeslot;
+ 	register unsigned int mask = mp->ma_size-1;
+ 	dictentry *ep0 = mp->ma_table;
+ 	register dictentry *ep;
+         cmpfunc compare = PyString_Type.tp_compare;
+ 
+ 	/* make sure this function doesn't have to handle non-string keys */
+ 	if (!PyString_Check(key)) {
+ #ifdef SHOW_CONVERSION_COUNTS
+ 		++converted;
+ #endif
+ 		mp->ma_lookup = lookdict;
+ 		return lookdict(mp, key, hash);
+ 	}
+ 	/* We must come up with (i, incr) such that 0 <= i < ma_size
+ 	   and 0 < incr < ma_size and both are a function of hash */
+ 	i = (~hash) & mask;
+ 	/* We use ~hash instead of hash, as degenerate hash functions, such
+ 	   as for ints <sigh>, can have lots of leading zeros. It's not
+ 	   really a performance risk, but better safe than sorry. */
+ 	ep = &ep0[i];
+ 	if (ep->me_key == NULL || ep->me_key == key)
+ 		return ep;
+ 	if (ep->me_key == dummy)
+ 		freeslot = ep;
+ 	else {
+ 		if (ep->me_hash == hash
+                     && compare(ep->me_key, key) == 0) {
+ 			return ep;
+ 		}
+ 		freeslot = NULL;
+ 	}
+ 	/* Derive incr from hash, just to make it more arbitrary. Note that
+ 	   incr must not be 0, or we will get into an infinite loop.*/
+ 	incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
+ 	if (!incr)
+ 		incr = mask;
+ 	for (;;) {
+ 		ep = &ep0[(i+incr)&mask];
+ 		if (ep->me_key == NULL) {
+ 			if (freeslot != NULL)
+ 				return freeslot;
+ 			else
+ 				return ep;
+ 		}
+ 		if (ep->me_key == dummy) {
+ 			if (freeslot == NULL)
+ 				freeslot = ep;
+ 		}
+ 		else if (ep->me_key == key
+ 			 || (ep->me_hash == hash
+ 			     && compare(ep->me_key, key) == 0)) {
+ 			return ep;
+                 }
+ 		/* Cycle through GF(2^n)-{0} */
+ 		incr = incr << 1;
+ 		if (incr > mask)
+ 			incr ^= mp->ma_poly; /* This will implicitly clear
+ 						the highest bit */
+ 	}
+ }
+ 
+ /*
  Internal routine to insert a new item into the table.
  Used both by the internal resize routine and by the public insert routine.
***************
*** 236,240 ****
  	PyObject *old_value;
  	register dictentry *ep;
! 	ep = lookdict(mp, key, hash);
  	if (ep->me_value != NULL) {
  		old_value = ep->me_value;
--- 346,350 ----
  	PyObject *old_value;
  	register dictentry *ep;
! 	ep = (mp->ma_lookup)(mp, key, hash);
  	if (ep->me_value != NULL) {
  		old_value = ep->me_value;
***************
*** 313,320 ****
  {
  	long hash;
  	if (!PyDict_Check(op)) {
  		return NULL;
  	}
! 	if (((dictobject *)op)->ma_table == NULL)
  		return NULL;
  #ifdef CACHE_HASH
--- 423,431 ----
  {
  	long hash;
+ 	dictobject *mp = (dictobject *)op;
  	if (!PyDict_Check(op)) {
  		return NULL;
  	}
! 	if (mp->ma_table == NULL)
  		return NULL;
  #ifdef CACHE_HASH
***************
*** 329,333 ****
  		}
  	}
! 	return lookdict((dictobject *)op, key, hash) -> me_value;
  }
  
--- 440,444 ----
  		}
  	}
! 	return (mp->ma_lookup)(mp, key, hash)->me_value;
  }
  
***************
*** 401,405 ****
  	if (((dictobject *)op)->ma_table == NULL)
  		goto empty;
! 	ep = lookdict(mp, key, hash);
  	if (ep->me_value == NULL) {
  	empty:
--- 512,516 ----
  	if (((dictobject *)op)->ma_table == NULL)
  		goto empty;
! 	ep = (mp->ma_lookup)(mp, key, hash);
  	if (ep->me_value == NULL) {
  	empty:
***************
*** 584,588 ****
  			return NULL;
  	}
! 	v = lookdict(mp, key, hash) -> me_value;
  	if (v == NULL)
  		PyErr_SetObject(PyExc_KeyError, key);
--- 695,699 ----
  			return NULL;
  	}
! 	v = (mp->ma_lookup)(mp, key, hash) -> me_value;
  	if (v == NULL)
  		PyErr_SetObject(PyExc_KeyError, key);
***************
*** 913,918 ****
  				PyErr_Clear(); /* Don't want errors here */
  		}
! 		aval = lookdict(a, akey, ahash) -> me_value;
! 		bval = lookdict(b, bkey, bhash) -> me_value;
  		res = PyObject_Compare(aval, bval);
  		if (res != 0)
--- 1024,1029 ----
  				PyErr_Clear(); /* Don't want errors here */
  		}
! 		aval = (a->ma_lookup)(a, akey, ahash) -> me_value;
! 		bval = (b->ma_lookup)(b, bkey, bhash) -> me_value;
  		res = PyObject_Compare(aval, bval);
  		if (res != 0)
***************
*** 949,953 ****
  			return NULL;
  	}
! 	ok = mp->ma_size != 0 && lookdict(mp, key, hash)->me_value != NULL;
  	return PyInt_FromLong(ok);
  }
--- 1060,1065 ----
  			return NULL;
  	}
! 	ok = (mp->ma_size != 0
! 	      && (mp->ma_lookup)(mp, key, hash)->me_value != NULL);
  	return PyInt_FromLong(ok);
  }
***************
*** 975,979 ****
  			return NULL;
  	}
! 	val = lookdict(mp, key, hash)->me_value;
  
    finally:
--- 1087,1091 ----
  			return NULL;
  	}
! 	val = (mp->ma_lookup)(mp, key, hash)->me_value;
  
    finally:
***************
*** 1007,1011 ****
  			return NULL;
  	}
! 	val = lookdict(mp, key, hash)->me_value;
  
    finally:
--- 1119,1123 ----
  			return NULL;
  	}
! 	val = (mp->ma_lookup)(mp, key, hash)->me_value;
  
    finally: