[Python-checkins] python/dist/src/Objects setobject.c,1.45,1.46

rhettinger@users.sourceforge.net rhettinger at users.sourceforge.net
Thu Aug 11 09:58:56 CEST 2005


Update of /cvsroot/python/python/dist/src/Objects
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26888/Objects

Modified Files:
	setobject.c 
Log Message:
* Add short-circuit code for in-place operations with self (such as
  s|=s, s&=s, s-=s, or s^=s).  Add related tests.

* Improve names for several variables and functions.

* Provide alternate table access functions (next, contains, add, and discard)
  that work with an entry argument instead of just a key.  This improves
  set-vs-set operations because we already have a hash value for each key
  and can avoid unnecessary calls to PyObject_Hash().  Provides a 5% to 20%
  speed-up for quick hashing elements like strings and integers.  Provides
  much more substantial improvements for slow hashing elements like tuples
  or objects defining a custom __hash__() function.

* Have difference operations resize() when 1/5 of the elements are dummies.
  Formerly, it was 1/6.  The new ratio triggers less frequently and only
  in cases that it can resize quicker and with greater benefit.  The right
  answer is probably either 1/4, 1/5, or 1/6.  Picked the middle value for
  an even trade-off between resize time and the space/time costs of dummy
  entries.



Index: setobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/setobject.c,v
retrieving revision 1.45
retrieving revision 1.46
diff -u -d -r1.45 -r1.46
--- setobject.c	7 Aug 2005 13:02:53 -0000	1.45
+++ setobject.c	11 Aug 2005 07:58:44 -0000	1.46
@@ -1,3 +1,4 @@
+
 /* set object implementation 
    Written and maintained by Raymond D. Hettinger <python at rcn.com>
    Derived from Lib/sets.py and Objects/dictobject.c.
@@ -226,7 +227,6 @@
 	typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, long);
 
 	assert(so->lookup != NULL);
-
 	entry = so->lookup(so, key, hash);
 	if (entry->key == NULL) {
 		/* UNUSED */
@@ -336,18 +336,30 @@
 	return 0;
 }
 
-/* CAUTION: set_add_internal() must guarantee that it won't resize the table */
+/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
+
 static int
-set_add_internal(register PySetObject *so, PyObject *key)
+set_add_entry(register PySetObject *so, setentry *entry)
+{
+	register int n_used;
+
+	assert(so->fill <= so->mask);  /* at least one empty slot */
+	n_used = so->used;
+	Py_INCREF(entry->key);
+	set_insert_key(so, entry->key, entry->hash);
+	if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
+		return 0;
+	return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
+}
+
+static int
+set_add_key(register PySetObject *so, PyObject *key)
 {
 	register long hash;
 	register int n_used;
 
-	if (PyString_CheckExact(key)) {
-		hash = ((PyStringObject *)key)->ob_shash;
-		if (hash == -1)
-			hash = PyObject_Hash(key);
-	} else {
+	if (!PyString_CheckExact(key) ||
+	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
 		hash = PyObject_Hash(key);
 		if (hash == -1)
 			return -1;
@@ -365,7 +377,23 @@
 #define DISCARD_FOUND 1
 
 static int
-set_discard_internal(PySetObject *so, PyObject *key)
+set_discard_entry(PySetObject *so, setentry *oldentry)
+{	register setentry *entry;
+	PyObject *old_key;
+
+	entry = (so->lookup)(so, oldentry->key, oldentry->hash);
+	if (entry->key == NULL  ||  entry->key == dummy)
+		return DISCARD_NOTFOUND;
+	old_key = entry->key;
+	Py_INCREF(dummy);
+	entry->key = dummy;
+	so->used--;
+	Py_DECREF(old_key);
+	return DISCARD_FOUND;
+}
+
+static int
+set_discard_key(PySetObject *so, PyObject *key)
 {
 	register long hash;
 	register setentry *entry;
@@ -457,39 +485,39 @@
  * Iterate over a set table.  Use like so:
  *
  *     int pos;
- *     PyObject *key;
+ *     setentry *entry;
  *     pos = 0;   # important!  pos should not otherwise be changed by you
- *     while (set_next_internal(yourset, &pos, &key)) {
- *              Refer to borrowed reference in key.
+ *     while (set_next(yourset, &pos, &entry)) {
+ *              Refer to borrowed reference in entry->key.
  *     }
  *
- * CAUTION:  In general, it isn't safe to use set_next_internal in a loop that
+ * CAUTION:  In general, it isn't safe to use set_next in a loop that
  * mutates the table.  
  */
 static int
-set_next_internal(PySetObject *so, int *pos, PyObject **key)
+set_next(PySetObject *so, int *pos_ptr, setentry **entry_ptr)
 {
 	register int i, mask;
-	register setentry *entry;
+	register setentry *table;
 
 	assert (PyAnySet_Check(so));
-	i = *pos;
+	i = *pos_ptr;
 	if (i < 0)
 		return 0;
-	entry = so->table;
+	table = so->table;
 	mask = so->mask;
-	while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
+	while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
 		i++;
-	*pos = i+1;
+	*pos_ptr = i+1;
 	if (i > mask)
 		return 0;
-	if (key)
-		*key = entry[i].key;
+	if (table[i].key)
+		*entry_ptr = &table[i];
 	return 1;
 }
 
 static int
-set_merge_internal(PySetObject *so, PyObject *otherset)
+set_merge(PySetObject *so, PyObject *otherset)
 {
 	PySetObject *other;
 	register int i;
@@ -525,7 +553,7 @@
 }
 
 static int
-set_contains_internal(PySetObject *so, PyObject *key)
+set_contains_key(PySetObject *so, PyObject *key)
 {
 	long hash;
 
@@ -539,6 +567,15 @@
 	return key != NULL && key != dummy;
 }
 
+static int
+set_contains_entry(PySetObject *so, setentry *entry)
+{
+	PyObject *key;
+
+	key = (so->lookup)(so, entry->key, entry->hash)->key; 
+	return key != NULL && key != dummy;
+}
+
 /***** Set iterator type ***********************************************/
 
 static PyTypeObject PySetIter_Type; /* Forward */
@@ -667,13 +704,13 @@
 	PyObject *key, *it;
 
 	if (PyAnySet_Check(other))
-		return set_merge_internal(so, other);
+		return set_merge(so, other);
 
 	if (PyDict_Check(other)) {
 		PyObject *key, *value;
 		int pos = 0;
 		while (PyDict_Next(other, &pos, &key, &value)) {
-			if (set_add_internal(so, key) == -1)
+			if (set_add_key(so, key) == -1)
 				return -1;
 		}
 		return 0;
@@ -684,7 +721,7 @@
 		return -1;
 
 	while ((key = PyIter_Next(it)) != NULL) {
-                if (set_add_internal(so, key) == -1) {
+                if (set_add_key(so, key) == -1) {
 			Py_DECREF(it);
 			Py_DECREF(key);
 			return -1;
@@ -833,10 +870,10 @@
 set_traverse(PySetObject *so, visitproc visit, void *arg)
 {
 	int pos = 0;
-	PyObject *key;
+	setentry *entry;
 
-	while (set_next_internal(so, &pos, &key))
-		Py_VISIT(key);
+	while (set_next(so, &pos, &entry))
+		Py_VISIT(entry->key);
 	return 0;
 }
 
@@ -897,14 +934,14 @@
 	PyObject *tmpkey;
 	int result;
 
-	result = set_contains_internal(so, key);
+	result = set_contains_key(so, key);
 	if (result == -1 && PyAnySet_Check(key)) {
 		PyErr_Clear();
 		tmpkey = make_new_set(&PyFrozenSet_Type, NULL);
 		if (tmpkey == NULL)
 			return -1;
 		set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
-		result = set_contains_internal(so, tmpkey);
+		result = set_contains_key(so, tmpkey);
 		set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
 		Py_DECREF(tmpkey);
 	}
@@ -943,6 +980,15 @@
 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
 
 static PyObject *
+set_clear(PySetObject *so)
+{
+	set_clear_internal(so);
+	Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
+
+static PyObject *
 set_union(PySetObject *so, PyObject *other)
 {
 	PySetObject *result;
@@ -991,6 +1037,11 @@
 	PySetObject *result;
 	PyObject *key, *it, *tmp;
 
+	if ((PyObject *)so == other) {
+		Py_INCREF(other);
+		return other;
+	}
+
 	result = (PySetObject *)make_new_set(so->ob_type, NULL);
 	if (result == NULL)
 		return NULL;
@@ -1001,11 +1052,12 @@
 		other = tmp;
 	}
 
-	if (PyAnySet_Check(other)) {
+	if (PyAnySet_Check(other)) {		
 		int pos = 0;
-		while (set_next_internal((PySetObject *)other, &pos, &key)) {
-			if (set_contains_internal(so, key)) {
-				if (set_add_internal(result, key) == -1) {
+		setentry *entry;
+		while (set_next((PySetObject *)other, &pos, &entry)) {
+			if (set_contains_entry(so, entry)) {
+				if (set_add_entry(result, entry) == -1) {
 					Py_DECREF(result);
 					return NULL;
 				}
@@ -1021,8 +1073,8 @@
 	}
 
 	while ((key = PyIter_Next(it)) != NULL) {
-		if (set_contains_internal(so, key)) {
-			if (set_add_internal(result, key) == -1) {
+		if (set_contains_key(so, key)) {
+			if (set_add_key(result, key) == -1) {
 				Py_DECREF(it);
 				Py_DECREF(result);
 				Py_DECREF(key);
@@ -1087,32 +1139,48 @@
 	return (PyObject *)so;
 }
 
-static PyObject *
-set_difference_update(PySetObject *so, PyObject *other)
+int
+set_difference_update_internal(PySetObject *so, PyObject *other)
 {
-	PyObject *key, *it;
+	if ((PyObject *)so == other)
+		return set_clear_internal(so);
 	
-	it = PyObject_GetIter(other);
-	if (it == NULL)
-		return NULL;
+	if (PyAnySet_Check(other)) {
+		setentry *entry;
+		int pos = 0;
 
-	while ((key = PyIter_Next(it)) != NULL) {
-		if (set_discard_internal(so, key) == -1) {
-			Py_DECREF(it);
+		while (set_next((PySetObject *)other, &pos, &entry))
+			set_discard_entry(so, entry);
+	} else {
+		PyObject *key, *it;
+		it = PyObject_GetIter(other);
+		if (it == NULL)
+			return -1;
+
+		while ((key = PyIter_Next(it)) != NULL) {
+			if (set_discard_key(so, key) == -1) {
+				Py_DECREF(it);
+				Py_DECREF(key);
+				return -1;
+			}
 			Py_DECREF(key);
-			return NULL;
 		}
-		Py_DECREF(key);
+		Py_DECREF(it);
+		if (PyErr_Occurred())
+			return -1;
 	}
-	Py_DECREF(it);
-	if (PyErr_Occurred())
-		return NULL;
-	/* If more than 1/6 are dummies, then resize them away. */
-	if ((so->fill - so->used) * 6 < so->mask)
+	/* If more than 1/5 are dummies, then resize them away. */
+	if ((so->fill - so->used) * 5 < so->mask)
+		return 0;
+	return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
+}
+
+static PyObject *
+set_difference_update(PySetObject *so, PyObject *other)
+{
+	if (set_difference_update_internal(so, other) != -1)
 		Py_RETURN_NONE;
-	if (set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4) == -1)
-		return NULL;
-	Py_RETURN_NONE;
+	return NULL;
 }
 
 PyDoc_STRVAR(difference_update_doc,
@@ -1121,18 +1189,16 @@
 static PyObject *
 set_difference(PySetObject *so, PyObject *other)
 {
-	PyObject *tmp, *key, *result;
+	PyObject *result;
+	setentry *entry;
 	int pos = 0;
 
 	if (!PyAnySet_Check(other)  && !PyDict_Check(other)) {
 		result = set_copy(so);
 		if (result == NULL)
+			return NULL;
+		if (set_difference_update_internal((PySetObject *)result, other) != -1)
 			return result;
-		tmp = set_difference_update((PySetObject *)result, other);
-		if (tmp != NULL) {
-			Py_DECREF(tmp);
-			return result;
-		}
 		Py_DECREF(result);
 		return NULL;
 	}
@@ -1142,18 +1208,21 @@
 		return NULL;
 
 	if (PyDict_Check(other)) {
-		while (set_next_internal(so, &pos, &key)) {
-			if (!PyDict_Contains(other, key)) {
-				if (set_add_internal((PySetObject *)result, key) == -1)
+		while (set_next(so, &pos, &entry)) {
+			setentry entrycopy;
+			entrycopy.hash = entry->hash;
+			entrycopy.key = entry->key;
+			if (!PyDict_Contains(other, entry->key)) {
+				if (set_add_entry((PySetObject *)result, &entrycopy) == -1)
 					return NULL;
 			}
 		}
 		return result;
 	}
 
-	while (set_next_internal(so, &pos, &key)) {
-		if (!set_contains_internal((PySetObject *)other, key)) {
-			if (set_add_internal((PySetObject *)result, key) == -1)
+	while (set_next(so, &pos, &entry)) {
+		if (!set_contains_entry((PySetObject *)other, entry)) {
+			if (set_add_entry((PySetObject *)result, entry) == -1)
 				return NULL;
 		}
 	}
@@ -1197,16 +1266,20 @@
 	PySetObject *otherset;
 	PyObject *key;
 	int pos = 0;
+	setentry *entry;
+
+	if ((PyObject *)so == other)
+		return set_clear(so);
 
 	if (PyDict_Check(other)) {
 		PyObject *value;
 		int rv;
 		while (PyDict_Next(other, &pos, &key, &value)) {
-			rv = set_discard_internal(so, key);
+			rv = set_discard_key(so, key);
 			if (rv == -1)
 				return NULL;
 			if (rv == DISCARD_NOTFOUND) {
-				if (set_add_internal(so, key) == -1)
+				if (set_add_key(so, key) == -1)
 					return NULL;
 			}
 		}
@@ -1222,14 +1295,14 @@
 			return NULL;
 	}
 
-	while (set_next_internal(otherset, &pos, &key)) {
-		int rv = set_discard_internal(so, key);
+	while (set_next(otherset, &pos, &entry)) {
+		int rv = set_discard_entry(so, entry);
 		if (rv == -1) {
 			Py_XDECREF(otherset);
 			return NULL;
 		}
 		if (rv == DISCARD_NOTFOUND) {
-			if (set_add_internal(so, key) == -1) {
+			if (set_add_entry(so, entry) == -1) {
 				Py_XDECREF(otherset);
 				return NULL;
 			}
@@ -1312,7 +1385,7 @@
 	for (i=so->used ; i ; entry++, i--) {
 		while (entry->key == NULL || entry->key==dummy)
 			entry++;
-		if (!set_contains_internal((PySetObject *)other, entry->key))
+		if (!set_contains_entry((PySetObject *)other, entry))
 			Py_RETURN_FALSE;
 	}
 	Py_RETURN_TRUE;
@@ -1448,16 +1521,16 @@
 static int
 set_tp_print(PySetObject *so, FILE *fp, int flags)
 {
-	PyObject *key;
+	setentry *entry;
 	int pos=0;
 	char *emit = "";	/* No separator emitted on first pass */
 	char *separator = ", ";
 
 	fprintf(fp, "%s([", so->ob_type->tp_name);
-	while (set_next_internal(so, &pos, &key)) {
+	while (set_next(so, &pos, &entry)) {
 		fputs(emit, fp);
 		emit = separator;
-		if (PyObject_Print(key, fp, 0) != 0)
+		if (PyObject_Print(entry->key, fp, 0) != 0)
 			return -1;
 	}
 	fputs("])", fp);
@@ -1465,18 +1538,9 @@
 }
 
 static PyObject *
-set_clear(PySetObject *so)
-{
-	set_clear_internal(so);
-	Py_RETURN_NONE;
-}
-
-PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
-
-static PyObject *
 set_add(PySetObject *so, PyObject *key)
 {
-	if (set_add_internal(so, key) == -1)
+	if (set_add_key(so, key) == -1)
 		return NULL;
 	Py_RETURN_NONE;
 }
@@ -1503,7 +1567,7 @@
 		return result;
 	}
 
-	rv = set_discard_internal(so, key);
+	rv = set_discard_key(so, key);
 	if (rv == -1) 
 		return NULL;
 	else if (rv == DISCARD_NOTFOUND) {
@@ -1534,7 +1598,7 @@
 		return result;
 	}
 
-	if (set_discard_internal(so, key) == -1)
+	if (set_discard_key(so, key) == -1)
 		return NULL;
 	Py_RETURN_NONE;
 }



More information about the Python-checkins mailing list