[Python-checkins] CVS: python/dist/src/Objects dictobject.c,2.74,2.75

Guido van Rossum gvanrossum@users.sourceforge.net
Sun, 15 Apr 2001 15:16:29 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv18004

Modified Files:
	dictobject.c 
Log Message:
Tentative fix for a problem that Tim discovered at the last moment,
and reported to python-dev: because we were calling dict_resize() in
PyDict_Next(), and because GC's dict_traverse() uses PyDict_Next(),
and because PyTuple_New() can cause GC, and because dict_items() calls
PyTuple_New(), it was possible for dict_items() to have the dict
resized right under its nose.

The solution is convoluted, and touches several places: keys(),
values(), items(), popitem(), PyDict_Next(), and PyDict_SetItem().

There are two parts to it. First, we no longer call dict_resize() in
PyDict_Next(), which seems to solve the immediate problem.  But then
PyDict_SetItem() must have a different policy about when *it* calls
dict_resize(), because we want to guarantee (e.g. for an algorithm
that Jeremy uses in the compiler) that you can loop over a dict using
PyDict_Next() and make changes to the dict as long as those changes
are only value replacements for existing keys using PyDict_SetItem().
This is done by resizing *after* the insertion instead of before, and
by remembering the size before we insert the item, and if the size is
still the same, we don't bother to even check if we might need to
resize.  An additional detail is that if the dict starts out empty, we
must still resize it before the insertion.

That was the first part. :-)

The second part is to make keys(), values(), items(), and popitem()
safe against side effects on the dict caused by allocations, under the
assumption that if the GC can cause arbitrary Python code to run, it
can cause other threads to run, and it's not inconceivable that our
dict could be resized -- it would be insane to write code that relies
on this, but not all code is sane.

Now, I have this nagging feeling that the loops in lookdict probably
are blissfully assuming that doing a simple key comparison does not
change the dict's size.  This is not necessarily true (the keys could
be class instances after all).  But that's a battle for another day.


Index: dictobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/dictobject.c,v
retrieving revision 2.74
retrieving revision 2.75
diff -C2 -r2.74 -r2.75
*** dictobject.c	2001/03/21 19:23:56	2.74
--- dictobject.c	2001/04/15 22:16:26	2.75
***************
*** 243,248 ****
  				PyErr_Restore(err_type, err_value, err_tb);
  			return ep;
!                 }
!                 else if (ep->me_hash == hash) {
  			if (!checked_error) {
  				checked_error = 1;
--- 243,248 ----
  				PyErr_Restore(err_type, err_value, err_tb);
  			return ep;
! 		}
! 		else if (ep->me_hash == hash) {
  			if (!checked_error) {
  				checked_error = 1;
***************
*** 290,294 ****
  	dictentry *ep0 = mp->ma_table;
  	register dictentry *ep;
!         cmpfunc compare = PyString_Type.tp_compare;
  
  	/* make sure this function doesn't have to handle non-string keys */
--- 290,294 ----
  	dictentry *ep0 = mp->ma_table;
  	register dictentry *ep;
! 	cmpfunc compare = PyString_Type.tp_compare;
  
  	/* make sure this function doesn't have to handle non-string keys */
***************
*** 313,317 ****
  	else {
  		if (ep->me_hash == hash
!                     && compare(ep->me_key, key) == 0) {
  			return ep;
  		}
--- 313,317 ----
  	else {
  		if (ep->me_hash == hash
! 		    && compare(ep->me_key, key) == 0) {
  			return ep;
  		}
***************
*** 339,343 ****
  			     && compare(ep->me_key, key) == 0)) {
  			return ep;
!                 }
  		/* Cycle through GF(2^n)-{0} */
  		incr = incr << 1;
--- 339,343 ----
  			     && compare(ep->me_key, key) == 0)) {
  			return ep;
! 		}
  		/* Cycle through GF(2^n)-{0} */
  		incr = incr << 1;
***************
*** 455,458 ****
--- 455,464 ----
  }
  
+ /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
+  * dictionary if it is merely replacing the value for an existing key.
+  * This is means that it's safe to loop over a dictionary with
+  * PyDict_Next() and occasionally replace a value -- but you can't
+  * insert new keys or remove them.
+  */
  int
  PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
***************
*** 460,463 ****
--- 466,471 ----
  	register dictobject *mp;
  	register long hash;
+ 	register int n_used;
+ 
  	if (!PyDict_Check(op)) {
  		PyErr_BadInternalCall();
***************
*** 486,502 ****
  		if (hash == -1)
  			return -1;
  	}
! 	/* If fill >= 2/3 size, adjust size.  Normally, this doubles the
  	 * size, but it's also possible for the dict to shrink (if ma_fill is
  	 * much larger than ma_used, meaning a lot of dict keys have been
  	 * deleted).
- 	 * CAUTION: this resize logic must match the logic in PyDict_Next.
  	 */
! 	if (mp->ma_fill*3 >= mp->ma_size*2 &&
! 	    dictresize(mp, mp->ma_used*2) != 0)
! 		return -1;
! 	Py_INCREF(value);
! 	Py_INCREF(key);
! 	insertdict(mp, key, hash, value);
  	return 0;
  }
--- 494,522 ----
  		if (hash == -1)
  			return -1;
+ 	}
+ 	if (mp->ma_fill >= mp->ma_size) {
+ 		/* No room for a new key.
+ 		 * This only happens when the dict is empty.
+ 		 * Let dictresize() create a minimal dict.
+ 		 */
+ 		assert(mp->ma_used == 0);
+ 		if (dictresize(mp, 0) != 0)
+ 			return -1;
+ 		assert(mp->ma_fill < mp->ma_size);
  	}
! 	n_used = mp->ma_used;
! 	Py_INCREF(value);
! 	Py_INCREF(key);
! 	insertdict(mp, key, hash, value);
! 	/* If we added a key, we can safely resize.  Otherwise skip this!
! 	 * If fill >= 2/3 size, adjust size.  Normally, this doubles the
  	 * size, but it's also possible for the dict to shrink (if ma_fill is
  	 * much larger than ma_used, meaning a lot of dict keys have been
  	 * deleted).
  	 */
! 	if (mp->ma_used > n_used && mp->ma_fill*3 >= mp->ma_size*2) {
! 		if (dictresize(mp, mp->ma_used*2) != 0)
! 			return -1;
! 	}
  	return 0;
  }
***************
*** 581,601 ****
  	if (i < 0)
  		return 0;
- 
- 	/* A hack to support loops that merely change values.
- 	 * The problem:  PyDict_SetItem() can either grow  or shrink the dict
- 	 * even when passed a key that's already in the dict.  This was a
- 	 * repeated source of subtle bugs, bad enough to justify a hack here.
- 	 * Approach:  If this is the first time PyDict_Next() is being called
- 	 * (i==0), first figure out whether PyDict_SetItem() *will* change the
- 	 * size, and if so get it changed before we start passing out internal
- 	 * indices.
- 	 */
- 	if (i == 0) {
- 		/* This must be a clone of PyDict_SetItem's resize logic. */
- 		if (mp->ma_fill*3 >= mp->ma_size*2 &&
- 		    dictresize(mp, mp->ma_used*2) != 0)
- 			return -1;
- 	}
- 
  	while (i < mp->ma_size && mp->ma_table[i].me_value == NULL)
  		i++;
--- 601,604 ----
***************
*** 758,772 ****
  {
  	register PyObject *v;
! 	register int i, j;
  	if (!PyArg_NoArgs(args))
  		return NULL;
! 	v = PyList_New(mp->ma_used);
  	if (v == NULL)
  		return NULL;
  	for (i = 0, j = 0; i < mp->ma_size; i++) {
  		if (mp->ma_table[i].me_value != NULL) {
  			PyObject *key = mp->ma_table[i].me_key;
  			Py_INCREF(key);
! 			PyList_SetItem(v, j, key);
  			j++;
  		}
--- 761,785 ----
  {
  	register PyObject *v;
! 	register int i, j, n;
! 
  	if (!PyArg_NoArgs(args))
  		return NULL;
!   again:
! 	n = mp->ma_used;
! 	v = PyList_New(n);
  	if (v == NULL)
  		return NULL;
+ 	if (n != mp->ma_used) {
+ 		/* Durnit.  The allocations caused the dict to resize.
+ 		 * Just start over, this shouldn't normally happen.
+ 		 */
+ 		Py_DECREF(v);
+ 		goto again;
+ 	}
  	for (i = 0, j = 0; i < mp->ma_size; i++) {
  		if (mp->ma_table[i].me_value != NULL) {
  			PyObject *key = mp->ma_table[i].me_key;
  			Py_INCREF(key);
! 			PyList_SET_ITEM(v, j, key);
  			j++;
  		}
***************
*** 779,793 ****
  {
  	register PyObject *v;
! 	register int i, j;
  	if (!PyArg_NoArgs(args))
  		return NULL;
! 	v = PyList_New(mp->ma_used);
  	if (v == NULL)
  		return NULL;
  	for (i = 0, j = 0; i < mp->ma_size; i++) {
  		if (mp->ma_table[i].me_value != NULL) {
  			PyObject *value = mp->ma_table[i].me_value;
  			Py_INCREF(value);
! 			PyList_SetItem(v, j, value);
  			j++;
  		}
--- 792,816 ----
  {
  	register PyObject *v;
! 	register int i, j, n;
! 
  	if (!PyArg_NoArgs(args))
  		return NULL;
!   again:
! 	n = mp->ma_used;
! 	v = PyList_New(n);
  	if (v == NULL)
  		return NULL;
+ 	if (n != mp->ma_used) {
+ 		/* Durnit.  The allocations caused the dict to resize.
+ 		 * Just start over, this shouldn't normally happen.
+ 		 */
+ 		Py_DECREF(v);
+ 		goto again;
+ 	}
  	for (i = 0, j = 0; i < mp->ma_size; i++) {
  		if (mp->ma_table[i].me_value != NULL) {
  			PyObject *value = mp->ma_table[i].me_value;
  			Py_INCREF(value);
! 			PyList_SET_ITEM(v, j, value);
  			j++;
  		}
***************
*** 800,826 ****
  {
  	register PyObject *v;
! 	register int i, j;
  	if (!PyArg_NoArgs(args))
  		return NULL;
! 	v = PyList_New(mp->ma_used);
  	if (v == NULL)
  		return NULL;
  	for (i = 0, j = 0; i < mp->ma_size; i++) {
  		if (mp->ma_table[i].me_value != NULL) {
! 			PyObject *key = mp->ma_table[i].me_key;
! 			PyObject *value = mp->ma_table[i].me_value;
! 			PyObject *item = PyTuple_New(2);
! 			if (item == NULL) {
! 				Py_DECREF(v);
! 				return NULL;
! 			}
  			Py_INCREF(key);
! 			PyTuple_SetItem(item, 0, key);
  			Py_INCREF(value);
! 			PyTuple_SetItem(item, 1, value);
! 			PyList_SetItem(v, j, item);
  			j++;
  		}
  	}
  	return v;
  }
--- 823,869 ----
  {
  	register PyObject *v;
! 	register int i, j, n;
! 	PyObject *item, *key, *value;
! 
  	if (!PyArg_NoArgs(args))
  		return NULL;
! 	/* Preallocate the list of tuples, to avoid allocations during
! 	 * the loop over the items, which could trigger GC, which
! 	 * could resize the dict. :-(
! 	 */
!   again:
! 	n = mp->ma_used;
! 	v = PyList_New(n);
  	if (v == NULL)
  		return NULL;
+ 	for (i = 0; i < n; i++) {
+ 		item = PyTuple_New(2);
+ 		if (item == NULL) {
+ 			Py_DECREF(v);
+ 			return NULL;
+ 		}
+ 		PyList_SET_ITEM(v, i, item);
+ 	}
+ 	if (n != mp->ma_used) {
+ 		/* Durnit.  The allocations caused the dict to resize.
+ 		 * Just start over, this shouldn't normally happen.
+ 		 */
+ 		Py_DECREF(v);
+ 		goto again;
+ 	}
+ 	/* Nothing we do below makes any function calls. */
  	for (i = 0, j = 0; i < mp->ma_size; i++) {
  		if (mp->ma_table[i].me_value != NULL) {
! 			key = mp->ma_table[i].me_key;
! 			value = mp->ma_table[i].me_value;
! 			item = PyList_GET_ITEM(v, j);
  			Py_INCREF(key);
! 			PyTuple_SET_ITEM(item, 0, key);
  			Py_INCREF(value);
! 			PyTuple_SET_ITEM(item, 1, value);
  			j++;
  		}
  	}
+ 	assert(j == n);
  	return v;
  }
***************
*** 831,835 ****
  	register int i;
  	dictobject *other;
!         dictentry *entry;
  	if (!PyArg_Parse(args, "O!", &PyDict_Type, &other))
  		return NULL;
--- 874,878 ----
  	register int i;
  	dictobject *other;
! 	dictentry *entry;
  	if (!PyArg_Parse(args, "O!", &PyDict_Type, &other))
  		return NULL;
***************
*** 871,875 ****
  	register int i;
  	dictobject *copy;
!         dictentry *entry;
  
  	if (o == NULL || !PyDict_Check(o)) {
--- 914,918 ----
  	register int i;
  	dictobject *copy;
! 	dictentry *entry;
  
  	if (o == NULL || !PyDict_Check(o)) {
***************
*** 1118,1121 ****
--- 1161,1173 ----
  		return NULL;
  	}
+ 	/* Allocate the result tuple first.  Believe it or not,
+ 	 * this allocation could trigger a garbage collection which
+ 	 * could resize the dict, which would invalidate the pointer
+ 	 * (ep) into the dict calculated below.
+ 	 * So we have to do this first.
+ 	 */
+ 	res = PyTuple_New(2);
+ 	if (res == NULL)
+ 		return NULL;
  	/* Set ep to "the first" dict entry with a value.  We abuse the hash
  	 * field of slot 0 to hold a search finger:
***************
*** 1142,1156 ****
  		}
  	}
! 	res = PyTuple_New(2);
! 	if (res != NULL) {
! 		PyTuple_SET_ITEM(res, 0, ep->me_key);
! 		PyTuple_SET_ITEM(res, 1, ep->me_value);
! 		Py_INCREF(dummy);
! 		ep->me_key = dummy;
! 		ep->me_value = NULL;
! 		mp->ma_used--;
! 		assert(mp->ma_table[0].me_value == NULL);
! 		mp->ma_table[0].me_hash = i + 1;  /* next place to start */
! 	}
  	return res;
  }
--- 1194,1205 ----
  		}
  	}
! 	PyTuple_SET_ITEM(res, 0, ep->me_key);
! 	PyTuple_SET_ITEM(res, 1, ep->me_value);
! 	Py_INCREF(dummy);
! 	ep->me_key = dummy;
! 	ep->me_value = NULL;
! 	mp->ma_used--;
! 	assert(mp->ma_table[0].me_value == NULL);
! 	mp->ma_table[0].me_hash = i + 1;  /* next place to start */
  	return res;
  }