[Python-checkins] python/dist/src/Objects classobject.c,2.160,2.161 dictobject.c,2.128,2.129 stringobject.c,2.181,2.182

gvanrossum@users.sourceforge.net gvanrossum@users.sourceforge.net
Mon, 19 Aug 2002 14:43:20 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv2517/Objects

Modified Files:
	classobject.c dictobject.c stringobject.c 
Log Message:
SF patch 576101, by Oren Tirosh: alternative implementation of
interning.  I modified Oren's patch significantly, but the basic idea
and most of the implementation is unchanged.  Interned strings created
with PyString_InternInPlace() are now mortal, and you must keep a
reference to the resulting string around; use the new function
PyString_InternImmortal() to create immortal interned strings.



Index: classobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/classobject.c,v
retrieving revision 2.160
retrieving revision 2.161
diff -C2 -d -r2.160 -r2.161
*** classobject.c	11 Jul 2002 06:23:50 -0000	2.160
--- classobject.c	19 Aug 2002 21:43:18 -0000	2.161
***************
*** 2301,2335 ****
  }
  
! static char *
! getclassname(PyObject *class)
  {
  	PyObject *name;
  
  	if (class == NULL)
! 		name = NULL;
! 	else
! 		name = PyObject_GetAttrString(class, "__name__");
  	if (name == NULL) {
  		/* This function cannot return an exception */
  		PyErr_Clear();
! 		return "?";
  	}
! 	if (!PyString_Check(name)) {
! 		Py_DECREF(name);
! 		return "?";
  	}
- 	PyString_InternInPlace(&name);
  	Py_DECREF(name);
- 	return PyString_AS_STRING(name);
  }
  
! static char *
! getinstclassname(PyObject *inst)
  {
  	PyObject *class;
- 	char *name;
  
! 	if (inst == NULL)
! 		return "nothing";
  
  	class = PyObject_GetAttrString(inst, "__class__");
--- 2301,2336 ----
  }
  
! static void
! getclassname(PyObject *class, char *buf, int bufsize)
  {
  	PyObject *name;
  
+ 	assert(bufsize > 1);
+ 	strcpy(buf, "?"); /* Default outcome */
  	if (class == NULL)
! 		return;
! 	name = PyObject_GetAttrString(class, "__name__");
  	if (name == NULL) {
  		/* This function cannot return an exception */
  		PyErr_Clear();
! 		return;
  	}
! 	if (PyString_Check(name)) {
! 		strncpy(buf, PyString_AS_STRING(name), bufsize);
! 		buf[bufsize-1] = '\0';
  	}
  	Py_DECREF(name);
  }
  
! static void
! getinstclassname(PyObject *inst, char *buf, int bufsize)
  {
  	PyObject *class;
  
! 	if (inst == NULL) {
! 		assert(bufsize > strlen("nothing"));
! 		strcpy(buf, "nothing");
! 		return;
! 	}
  
  	class = PyObject_GetAttrString(inst, "__class__");
***************
*** 2340,2346 ****
  		Py_INCREF(class);
  	}
! 	name = getclassname(class);
  	Py_XDECREF(class);
- 	return name;
  }
  
--- 2341,2346 ----
  		Py_INCREF(class);
  	}
! 	getclassname(class, buf, bufsize);
  	Py_XDECREF(class);
  }
  
***************
*** 2367,2370 ****
--- 2367,2374 ----
  		}
  		if (!ok) {
+ 			char clsbuf[256];
+ 			char instbuf[256];
+ 			getclassname(class, clsbuf, sizeof(clsbuf));
+ 			getinstclassname(self, instbuf, sizeof(instbuf));
  			PyErr_Format(PyExc_TypeError,
  				     "unbound method %s%s must be called with "
***************
*** 2373,2378 ****
  				     PyEval_GetFuncName(func),
  				     PyEval_GetFuncDesc(func),
! 				     getclassname(class),
! 				     getinstclassname(self),
  				     self == NULL ? "" : " instance");
  			return NULL;
--- 2377,2382 ----
  				     PyEval_GetFuncName(func),
  				     PyEval_GetFuncDesc(func),
! 				     clsbuf,
! 				     instbuf,
  				     self == NULL ? "" : " instance");
  			return NULL;

Index: dictobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/dictobject.c,v
retrieving revision 2.128
retrieving revision 2.129
diff -C2 -d -r2.128 -r2.129
*** dictobject.c	17 Jul 2002 16:30:37 -0000	2.128
--- dictobject.c	19 Aug 2002 21:43:18 -0000	2.129
***************
*** 512,524 ****
  	mp = (dictobject *)op;
  	if (PyString_CheckExact(key)) {
! 		if (((PyStringObject *)key)->ob_sinterned != NULL) {
! 			key = ((PyStringObject *)key)->ob_sinterned;
! 			hash = ((PyStringObject *)key)->ob_shash;
! 		}
! 		else {
! 			hash = ((PyStringObject *)key)->ob_shash;
! 			if (hash == -1)
! 				hash = PyObject_Hash(key);
! 		}
  	}
  	else {
--- 512,518 ----
  	mp = (dictobject *)op;
  	if (PyString_CheckExact(key)) {
! 		hash = ((PyStringObject *)key)->ob_shash;
! 		if (hash == -1)
! 			hash = PyObject_Hash(key);
  	}
  	else {

Index: stringobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v
retrieving revision 2.181
retrieving revision 2.182
diff -C2 -d -r2.181 -r2.182
*** stringobject.c	19 Aug 2002 19:26:42 -0000	2.181
--- stringobject.c	19 Aug 2002 21:43:18 -0000	2.182
***************
*** 16,19 ****
--- 16,30 ----
  static PyStringObject *nullstring;
  
+ /* This dictionary holds all interned strings.  Note that references to
+    strings in this dictionary are *not* counted in the string's ob_refcnt.
+    When the interned string reaches a refcnt of 0 the string deallocation
+    function will delete the reference from this dictionary.
+ 
+    Another way to look at this is that to say that the actual reference 
+    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
+ */
+ static PyObject *interned;
+ 
+ 
  /*
     For both PyString_FromString() and PyString_FromStringAndSize(), the
***************
*** 70,74 ****
  	PyObject_INIT_VAR(op, &PyString_Type, size);
  	op->ob_shash = -1;
! 	op->ob_sinterned = NULL;
  	if (str != NULL)
  		memcpy(op->ob_sval, str, size);
--- 81,85 ----
  	PyObject_INIT_VAR(op, &PyString_Type, size);
  	op->ob_shash = -1;
! 	op->ob_sstate = SSTATE_NOT_INTERNED;
  	if (str != NULL)
  		memcpy(op->ob_sval, str, size);
***************
*** 126,130 ****
  	PyObject_INIT_VAR(op, &PyString_Type, size);
  	op->ob_shash = -1;
! 	op->ob_sinterned = NULL;
  	memcpy(op->ob_sval, str, size+1);
  	/* share short strings */
--- 137,141 ----
  	PyObject_INIT_VAR(op, &PyString_Type, size);
  	op->ob_shash = -1;
! 	op->ob_sstate = SSTATE_NOT_INTERNED;
  	memcpy(op->ob_sval, str, size+1);
  	/* share short strings */
***************
*** 487,490 ****
--- 498,519 ----
  string_dealloc(PyObject *op)
  {
+ 	switch (PyString_CHECK_INTERNED(op)) {
+ 		case SSTATE_NOT_INTERNED:
+ 			break;
+ 
+ 		case SSTATE_INTERNED_MORTAL:
+ 			/* revive dead object temporarily for DelItem */
+ 			op->ob_refcnt = 3;
+ 			if (PyDict_DelItem(interned, op) != 0)
+ 				Py_FatalError(
+ 					"deletion of interned string failed");
+ 			break;
+ 
+ 		case SSTATE_INTERNED_IMMORTAL:
+ 			Py_FatalError("Immortal interned string died.");
+ 
+ 		default:
+ 			Py_FatalError("Inconsistent interned string state.");
+ 	}
  	op->ob_type->tp_free(op);
  }
***************
*** 886,890 ****
  	PyObject_INIT_VAR(op, &PyString_Type, size);
  	op->ob_shash = -1;
! 	op->ob_sinterned = NULL;
  	memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
  	memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
--- 915,919 ----
  	PyObject_INIT_VAR(op, &PyString_Type, size);
  	op->ob_shash = -1;
! 	op->ob_sstate = SSTATE_NOT_INTERNED;
  	memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
  	memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
***************
*** 929,933 ****
  	PyObject_INIT_VAR(op, &PyString_Type, size);
  	op->ob_shash = -1;
! 	op->ob_sinterned = NULL;
  	for (i = 0; i < size; i += a->ob_size)
  		memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
--- 958,962 ----
  	PyObject_INIT_VAR(op, &PyString_Type, size);
  	op->ob_shash = -1;
! 	op->ob_sstate = SSTATE_NOT_INTERNED;
  	for (i = 0; i < size; i += a->ob_size)
  		memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
***************
*** 1094,1100 ****
  	if (a->ob_shash != -1)
  		return a->ob_shash;
- 	if (a->ob_sinterned != NULL)
- 		return (a->ob_shash =
- 			((PyStringObject *)(a->ob_sinterned))->ob_shash);
  	len = a->ob_size;
  	p = (unsigned char *) a->ob_sval;
--- 1123,1126 ----
***************
*** 3068,3073 ****
  		((PyStringObject *)pnew)->ob_shash =
  			((PyStringObject *)tmp)->ob_shash;
! 		((PyStringObject *)pnew)->ob_sinterned =
! 			((PyStringObject *)tmp)->ob_sinterned;
  	}
  	Py_DECREF(tmp);
--- 3094,3098 ----
  		((PyStringObject *)pnew)->ob_shash =
  			((PyStringObject *)tmp)->ob_shash;
! 		((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
  	}
  	Py_DECREF(tmp);
***************
*** 3984,4003 ****
  }
  
- 
- 
- /* This dictionary will leak at PyString_Fini() time.  That's acceptable
-  * because PyString_Fini() specifically frees interned strings that are
-  * only referenced by this dictionary.  The CVS log entry for revision 2.45
-  * says:
-  *
-  *    Change the Fini function to only remove otherwise unreferenced
-  *    strings from the interned table.  There are references in
-  *    hard-to-find static variables all over the interpreter, and it's not
-  *    worth trying to get rid of all those; but "uninterning" isn't fair
-  *    either and may cause subtle failures later -- so we have to keep them
-  *    in the interned table.
-  */
- static PyObject *interned;
- 
  void
  PyString_InternInPlace(PyObject **p)
--- 4009,4012 ----
***************
*** 4007,4053 ****
  	if (s == NULL || !PyString_Check(s))
  		Py_FatalError("PyString_InternInPlace: strings only please!");
! 	if ((t = s->ob_sinterned) != NULL) {
! 		if (t == (PyObject *)s)
! 			return;
! 		Py_INCREF(t);
! 		*p = t;
! 		Py_DECREF(s);
  		return;
- 	}
  	if (interned == NULL) {
  		interned = PyDict_New();
! 		if (interned == NULL)
  			return;
  	}
  	if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
  		Py_INCREF(t);
! 		*p = s->ob_sinterned = t;
! 		Py_DECREF(s);
  		return;
  	}
! 	/* Ensure that only true string objects appear in the intern dict,
! 	   and as the value of ob_sinterned. */
! 	if (PyString_CheckExact(s)) {
! 		t = (PyObject *)s;
! 		if (PyDict_SetItem(interned, t, t) == 0) {
! 			s->ob_sinterned = t;
! 			return;
! 		}
! 	}
! 	else {
  		t = PyString_FromStringAndSize(PyString_AS_STRING(s),
  						PyString_GET_SIZE(s));
! 		if (t != NULL) {
! 			if (PyDict_SetItem(interned, t, t) == 0) {
! 				*p = s->ob_sinterned = t;
! 				Py_DECREF(s);
! 				return;
! 			}
! 			Py_DECREF(t);
  		}
  	}
  	PyErr_Clear();
  }
  
  
  PyObject *
--- 4016,4070 ----
  	if (s == NULL || !PyString_Check(s))
  		Py_FatalError("PyString_InternInPlace: strings only please!");
! 	if (PyString_CHECK_INTERNED(s))
  		return;
  	if (interned == NULL) {
  		interned = PyDict_New();
! 		if (interned == NULL) {
! 			PyErr_Clear(); /* Don't leave an exception */
  			return;
+ 		}
  	}
  	if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
  		Py_INCREF(t);
! 		Py_DECREF(*p);
! 		*p = t;
  		return;
  	}
! 	/* Ensure that only true string objects appear in the intern dict */
! 	if (!PyString_CheckExact(s)) {
  		t = PyString_FromStringAndSize(PyString_AS_STRING(s),
  						PyString_GET_SIZE(s));
! 		if (t == NULL) {
! 			PyErr_Clear();
! 			return;
  		}
+ 	} else {
+ 		t = (PyObject*) s;
+ 		Py_INCREF(t);
  	}
+ 
+ 	if (PyDict_SetItem(interned, t, t) == 0) {
+ 		/* The two references in interned are not counted by
+ 		refcnt.  The string deallocator will take care of this */
+ 		((PyObject *)t)->ob_refcnt-=2;
+ 		PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
+ 		Py_DECREF(*p);
+ 		*p = t;
+ 		return;
+ 	}
+ 	Py_DECREF(t);
  	PyErr_Clear();
  }
  
+ void
+ PyString_InternImmortal(PyObject **p)
+ {
+ 	PyString_InternInPlace(p);
+ 	if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
+ 		PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
+ 		Py_INCREF(*p);
+ 	}
+ }
+ 
  
  PyObject *
***************
*** 4071,4097 ****
  	Py_XDECREF(nullstring);
  	nullstring = NULL;
- 	if (interned) {
- 		int pos, changed;
- 		PyObject *key, *value;
- 		do {
- 			changed = 0;
- 			pos = 0;
- 			while (PyDict_Next(interned, &pos, &key, &value)) {
- 				if (key->ob_refcnt == 2 && key == value) {
- 					PyDict_DelItem(interned, key);
- 					changed = 1;
- 				}
- 			}
- 		} while (changed);
- 	}
  }
  
  void _Py_ReleaseInternedStrings(void)
  {
! 	if (interned) {
! 		fprintf(stderr, "releasing interned strings\n");
! 		PyDict_Clear(interned);
! 		Py_DECREF(interned);
! 		interned = NULL;
  	}
  }
--- 4088,4134 ----
  	Py_XDECREF(nullstring);
  	nullstring = NULL;
  }
  
  void _Py_ReleaseInternedStrings(void)
  {
! 	PyObject *keys;
! 	PyStringObject *s;
! 	int i, n;
! 
! 	if (interned == NULL || !PyDict_Check(interned))
! 		return;
! 	keys = PyDict_Keys(interned);
! 	if (keys == NULL || !PyList_Check(keys)) {
! 		PyErr_Clear();
! 		return;
! 	}
! 
! 	/* Since _Py_ReleaseInternedStrings() is intended to help a leak
! 	   detector, interned strings are not forcibly deallocated; rather, we
! 	   give them their stolen references back, and then clear and DECREF
! 	   the interned dict. */
! 	   
! 	fprintf(stderr, "releasing interned strings\n");
! 	n = PyList_GET_SIZE(keys);
! 	for (i = 0; i < n; i++) {
! 		s = (PyStringObject *) PyList_GET_ITEM(keys, i);
! 		switch (s->ob_sstate) {
! 		case SSTATE_NOT_INTERNED:
! 			/* XXX Shouldn't happen */
! 			break;
! 		case SSTATE_INTERNED_IMMORTAL:
! 			s->ob_refcnt += 1;
! 			break;
! 		case SSTATE_INTERNED_MORTAL:
! 			s->ob_refcnt += 2;
! 			break;
! 		default:
! 			Py_FatalError("Inconsistent interned string state.");
! 		}
! 		s->ob_sstate = SSTATE_NOT_INTERNED;
  	}
+ 	Py_DECREF(keys);
+ 	PyDict_Clear(interned);
+ 	Py_DECREF(interned);
+ 	interned = NULL;
  }