[Python-checkins] python/dist/src/Modules itertoolsmodule.c, 1.25, 1.26

rhettinger at users.sourceforge.net rhettinger at users.sourceforge.net
Wed Nov 12 09:32:28 EST 2003


Update of /cvsroot/python/python/dist/src/Modules
In directory sc8-pr-cvs1:/tmp/cvs-serv16547/Modules

Modified Files:
	itertoolsmodule.c 
Log Message:
Improve the implementation of itertools.tee().

Formerly, underlying queue was implemented in terms of two lists.  The
new queue is a series of singly-linked fixed length lists.

The new implementation runs much faster, supports multi-way tees, and
allows tees of tees without additional memory costs.

The root ideas for this structure were contributed by Andrew Koenig
and Guido van Rossum.



Index: itertoolsmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/itertoolsmodule.c,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -d -r1.25 -r1.26
*** itertoolsmodule.c	28 Oct 2003 07:32:28 -0000	1.25
--- itertoolsmodule.c	12 Nov 2003 14:32:26 -0000	1.26
***************
*** 8,127 ****
  */
  
! /* independent iterator object supporting the tee object ***************/
! 
! /* The tee object maintains a queue of data seen by the leading iterator
!    but not seen by the trailing iterator.  When the leading iterator
!    gets data from PyIter_Next() it appends a copy to the inbasket stack.
!    When the trailing iterator needs data, it is popped from the outbasket
!    stack.  If the outbasket stack is empty, then it is filled from the
!    inbasket (i.e. the queue is implemented using two stacks so that only
!    O(n) operations like append() and pop() are used to access data and
!    calls to reverse() never move any data element more than once).
  
!    If one of the independent iterators gets deallocated, it sets tee's
!    save_mode to zero so that future calls to PyIter_Next() stop getting
!    saved to the queue (because there is no longer a second iterator that
!    may need the data).
  */
  
  typedef struct {
  	PyObject_HEAD
  	PyObject *it;
! 	PyObject *inbasket;
! 	PyObject *outbasket;
! 	int save_mode;
! 	int num_seen;
! } teeobject;
  
  typedef struct {
  	PyObject_HEAD
! 	teeobject *tee;
! 	int num_seen;
! } iiobject;
  
! static PyTypeObject ii_type;
  
  static PyObject *
! ii_next(iiobject *lz)
  {
! 	teeobject *to = lz->tee;
! 	PyObject *result, *tmp;
! 	int n;
  
! 	if (lz->num_seen == to->num_seen) { 
! 		/* This instance is leading, use iter to get more data */
! 		result = PyIter_Next(to->it);
! 		if (result == NULL)
! 			return NULL;
! 		if (to->save_mode)
! 			if(PyList_Append(to->inbasket, result) == -1){
! 				Py_DECREF(result);
! 				return NULL;
! 			}
! 		to->num_seen++;
! 		lz->num_seen++;
! 		return result;
! 	}
  
! 	/* This instance is trailing, get data from the queue */
! 	if (PyList_GET_SIZE(to->outbasket) == 0) {
! 		/* outbasket is empty, so refill from the inbasket */
! 		tmp = to->outbasket;
! 		to->outbasket = to->inbasket;
! 		to->inbasket = tmp;
! 		PyList_Reverse(to->outbasket);
! 	}
  
! 	/* Pop result from to->outbasket */
! 	n = PyList_GET_SIZE(to->outbasket);
! 	assert(n>0);
! 	result = PyList_GET_ITEM(to->outbasket, n-1);
! 	Py_INCREF(result);
! 	if (PyList_SetSlice(to->outbasket, n-1, n, NULL) == -1) {
! 		Py_DECREF(result);
! 		return NULL;
! 	}
! 	lz->num_seen++;
! 	return result;
  }
  
! static void
! ii_dealloc(iiobject *ii)
  {
! 	teeobject *to = ii->tee;
! 	int n;
  
! 	PyObject_GC_UnTrack(ii);
! 	ii->tee->save_mode = 0;  /* Stop saving data */
! 	if (ii->num_seen < to->num_seen) {
! 		/* It is the trailing iterator being freed; this means 
! 		   that the stored queue data is no longer needed */
! 		n = PyList_GET_SIZE(to->inbasket);
! 		PyList_SetSlice(to->inbasket, 0, n, NULL);
! 		n = PyList_GET_SIZE(to->outbasket);
! 		PyList_SetSlice(to->outbasket, 0, n, NULL);
  	}
! 	Py_XDECREF(ii->tee);
! 	PyObject_GC_Del(ii);
  }
  
! static int
! ii_traverse(iiobject *ii, visitproc visit, void *arg)
  {
! 	if (ii->tee)
! 		return visit((PyObject *)(ii->tee), arg);
! 	return 0;
  }
  
! PyDoc_STRVAR(ii_doc, "Independent iterator created by tee(iterable).");
  
! static PyTypeObject ii_type = {
  	PyObject_HEAD_INIT(&PyType_Type)
  	0,					/* ob_size */
! 	"itertools.tee_iterator",		/* tp_name */
! 	sizeof(iiobject),			/* tp_basicsize */
  	0,					/* tp_itemsize */
  	/* methods */
! 	(destructor)ii_dealloc,			/* tp_dealloc */
  	0,					/* tp_print */
  	0,					/* tp_getattr */
--- 8,107 ----
  */
  
! /* tee object and with supporting function and objects ***************/
  
! /* The teedataobject pre-allocates space for LINKCELLS number of objects.
!    To help the object fit neatly inside cache lines (space for 16 to 32
!    pointers), the value should be a multiple of 16 minus  space for 
!    the other structure members including PyHEAD overhead.  The larger the
!    value, the less memory overhead per object and the less time spent
!    allocating/deallocating new links.  The smaller the number, the less
!    wasted space and the more rapid freeing of older data.
  */
+ #define LINKCELLS 57
  
  typedef struct {
  	PyObject_HEAD
  	PyObject *it;
! 	int numread;
! 	PyObject *nextlink;
! 	PyObject *(values[LINKCELLS]);
! } teedataobject;
  
  typedef struct {
  	PyObject_HEAD
! 	teedataobject *dataobj;
! 	int index;
! } teeobject;
  
! static PyTypeObject teedataobject_type;
  
  static PyObject *
! teedataobject_new(PyObject *it)
  {
! 	teedataobject *tdo;
  
! 	tdo = PyObject_New(teedataobject, &teedataobject_type);
! 	if (tdo == NULL)
! 		return NULL;
  
! 	tdo->numread = 0;
! 	tdo->nextlink = NULL;
! 	Py_INCREF(it);
! 	tdo->it = it;
! 	return (PyObject *)tdo;
! }
  
! static PyObject *
! teedataobject_jumplink(teedataobject *tdo)
! {
! 	if (tdo->nextlink == NULL)
! 		tdo->nextlink = teedataobject_new(tdo->it);
! 	Py_INCREF(tdo->nextlink);
! 	return tdo->nextlink;
  }
  
! static PyObject *
! teedataobject_getitem(teedataobject *tdo, int i)
  {
! 	PyObject *value;
  
! 	assert(i < LINKCELLS);
! 	if (i < tdo->numread)
! 		value = tdo->values[i];
! 	else {
! 		/* this is the lead iterator, so fetch more data */
! 		assert(i == tdo->numread);
! 		value = PyIter_Next(tdo->it);
! 		if (value == NULL)
! 			return NULL;
! 		tdo->numread++;
! 		tdo->values[i] = value;
  	}
! 	Py_INCREF(value);
! 	return value;
  }
  
! static void
! teedataobject_dealloc(teedataobject *tdo)
  {
! 	int i;
! 
! 	for (i=0 ; i<tdo->numread ; i++)
! 		Py_DECREF(tdo->values[i]);
! 	Py_XDECREF(tdo->it);
! 	Py_XDECREF(tdo->nextlink);
! 	PyObject_Del(tdo);
  }
  
! PyDoc_STRVAR(teedataobject_doc, "Data container common to multiple tee objects.");
  
! static PyTypeObject teedataobject_type = {
  	PyObject_HEAD_INIT(&PyType_Type)
  	0,					/* ob_size */
! 	"itertools.tee_dataobject",		/* tp_name */
! 	sizeof(teedataobject),			/* tp_basicsize */
  	0,					/* tp_itemsize */
  	/* methods */
! 	(destructor)teedataobject_dealloc,	/* tp_dealloc */
  	0,					/* tp_print */
  	0,					/* tp_getattr */
***************
*** 138,247 ****
  	0,					/* tp_setattro */
  	0,					/* tp_as_buffer */
! 	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
! 	ii_doc,					/* tp_doc */
! 	(traverseproc)ii_traverse,		/* tp_traverse */
! 	0,					/* tp_clear */
! 	0,					/* tp_richcompare */	
! 	0,					/* tp_weaklistoffset */
! 	PyObject_SelfIter,			/* tp_iter */
! 	(iternextfunc)ii_next,			/* tp_iternext */
! 	0,					/* tp_methods */
  };
  
- /* tee object **********************************************************/
  
  static PyTypeObject tee_type;
  
  static PyObject *
! tee_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  {
! 	PyObject *it = NULL;
! 	PyObject *iterable;
! 	PyObject *inbasket = NULL, *outbasket = NULL, *result = NULL;
! 	teeobject *to = NULL;
! 	int i;
  
! 	if (!PyArg_UnpackTuple(args, "tee", 1, 1, &iterable))
  		return NULL;
  
! 	it = PyObject_GetIter(iterable);
! 	if (it == NULL) goto fail;
! 
! 	inbasket = PyList_New(0);
! 	if (inbasket == NULL) goto fail;
  
! 	outbasket = PyList_New(0);
! 	if (outbasket == NULL) goto fail;
  
! 	to = PyObject_GC_New(teeobject, &tee_type);
! 	if (to == NULL)  goto fail;
  
! 	to->it = it;
! 	to->inbasket = inbasket;
! 	to->outbasket = outbasket;
! 	to->save_mode = 1;
! 	to->num_seen = 0;
! 	PyObject_GC_Track(to);
  
! 	/* create independent iterators */
! 	result = PyTuple_New(2);
! 	if (result == NULL)  goto fail;
! 	for (i=0 ; i<2 ; i++) {
! 		iiobject *indep_it = PyObject_GC_New(iiobject, &ii_type);
! 		if (indep_it == NULL) goto fail;
! 		Py_INCREF(to);
! 		indep_it->tee = to;
! 		indep_it->num_seen = 0;
! 		PyObject_GC_Track(indep_it);
! 		PyTuple_SET_ITEM(result, i, (PyObject *)indep_it);
  	}
! 	goto succeed;
! fail:
  	Py_XDECREF(it);
! 	Py_XDECREF(inbasket);
! 	Py_XDECREF(outbasket);
! 	Py_XDECREF(result);
! succeed:
! 	Py_XDECREF(to);
! 	return result;
  }
  
! static void
! tee_dealloc(teeobject *to)
  {
! 	PyObject_GC_UnTrack(to);
! 	Py_XDECREF(to->inbasket);
! 	Py_XDECREF(to->outbasket);
! 	Py_XDECREF(to->it);
! 	PyObject_GC_Del(to);
  }
  
! static int
! tee_traverse(teeobject *to, visitproc visit, void *arg)
  {
! 	int err;
! 
! 	if (to->it) {
! 		err = visit(to->it, arg);
! 		if (err)
! 			return err;
! 	}
! 	if (to->inbasket) {
! 		err = visit(to->inbasket, arg);
! 		if (err)
! 			return err;
! 	}
! 	if (to->outbasket) {
! 		err = visit(to->outbasket, arg);
! 		if (err)
! 			return err;
! 	}
! 	return 0;
  }
  
! PyDoc_STRVAR(tee_doc,
! "tee(iterable) --> (it1, it2)\n\
! \n\
! Split the iterable into two independent iterables.");
  
  static PyTypeObject tee_type = {
--- 118,211 ----
  	0,					/* tp_setattro */
  	0,					/* tp_as_buffer */
! 	Py_TPFLAGS_DEFAULT,			/* tp_flags */
! 	teedataobject_doc,			/* tp_doc */
! 	0,					/* tp_traverse */
  };
  
  
  static PyTypeObject tee_type;
  
  static PyObject *
! tee_next(teeobject *to)
  {
! 	PyObject *value, *link;
  
! 	if (to->index >= LINKCELLS) {
! 		link = teedataobject_jumplink(to->dataobj);
! 		Py_XDECREF(to->dataobj);
! 		to->dataobj = (teedataobject *)link;
! 		to->index = 0;
! 	}
! 	value = teedataobject_getitem(to->dataobj, to->index);
! 	if (value == NULL)
  		return NULL;
+ 	to->index++;
+ 	return value;
+ }
  
! static PyObject *
! tee_copy(teeobject *to)
! {
! 	teeobject *newto;
  
! 	newto = PyObject_New(teeobject, &tee_type);
! 	if (newto == NULL)
! 		return NULL;
! 	Py_INCREF(to->dataobj);
! 	newto->dataobj = to->dataobj;
! 	newto->index = to->index;
! 	return (PyObject *)newto;
! }
  
! PyDoc_STRVAR(teecopy_doc, "Returns an independent iterator.");
  
! static PyObject *
! tee_fromiterable(PyObject *iterable)
! {
! 	teeobject *to;
! 	PyObject *it = NULL;
  
! 	it = PyObject_GetIter(iterable);
! 	if (it == NULL)
! 		return NULL;
! 	if (PyObject_TypeCheck(it, &tee_type)) {
! 		to = (teeobject *)tee_copy((teeobject *)it);
! 		goto done;
  	}
! 
! 	to = PyObject_New(teeobject, &tee_type);
! 	if (to == NULL) 
! 		goto done;
! 	to->dataobj = (teedataobject *)teedataobject_new(it);
! 	to->index = 0;
! done:
  	Py_XDECREF(it);
! 	return (PyObject *)to;
  }
  
! static PyObject *
! tee_new(PyTypeObject *type, PyObject *args, PyObject *kw)
  {
! 	PyObject *iterable;
! 
! 	if (!PyArg_UnpackTuple(args, "tee", 1, 1, &iterable))
! 		return NULL;
! 	return tee_fromiterable(iterable);
  }
  
! static void
! tee_dealloc(teeobject *to)
  {
! 	Py_XDECREF(to->dataobj);
! 	PyObject_Del(to);
  }
  
! PyDoc_STRVAR(teeobject_doc,
! "Iterator wrapped to make it copyable");
! 
! static PyMethodDef tee_methods[] = {
! 	{"__copy__",	(PyCFunction)tee_copy,	METH_NOARGS, teecopy_doc},
!  	{NULL,		NULL}		/* sentinel */
! };
  
  static PyTypeObject tee_type = {
***************
*** 267,279 ****
  	0,				/* tp_setattro */
  	0,				/* tp_as_buffer */
! 	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,	/* tp_flags */
! 	tee_doc,			/* tp_doc */
! 	(traverseproc)tee_traverse,	/* tp_traverse */
  	0,				/* tp_clear */
  	0,				/* tp_richcompare */
  	0,				/* tp_weaklistoffset */
! 	0,				/* tp_iter */
! 	0,				/* tp_iternext */
! 	0,				/* tp_methods */
  	0,				/* tp_members */
  	0,				/* tp_getset */
--- 231,243 ----
  	0,				/* tp_setattro */
  	0,				/* tp_as_buffer */
! 	Py_TPFLAGS_DEFAULT,		/* tp_flags */
! 	teeobject_doc,			/* tp_doc */
! 	0,				/* tp_traverse */
  	0,				/* tp_clear */
  	0,				/* tp_richcompare */
  	0,				/* tp_weaklistoffset */
! 	PyObject_SelfIter,		/* tp_iter */
! 	(iternextfunc)tee_next,		/* tp_iternext */
! 	tee_methods,			/* tp_methods */
  	0,				/* tp_members */
  	0,				/* tp_getset */
***************
*** 286,291 ****
--- 250,299 ----
  	0,				/* tp_alloc */
  	tee_new,			/* tp_new */
+ 	PyObject_Del,			/* tp_free */
  };
  
+ static PyObject *
+ tee(PyObject *self, PyObject *args)
+ {
+ 	int i, n=2;
+ 	PyObject *it, *iterable, *copyable, *result;
+ 
+ 	if (!PyArg_ParseTuple(args, "O|i", &iterable, &n))
+ 		return NULL;
+ 	result = PyTuple_New(n);
+ 	if (result == NULL)
+ 		return NULL;
+ 	if (n == 0)
+ 		return result;
+ 	it = PyObject_GetIter(iterable);
+ 	if (it == NULL) {
+ 		Py_DECREF(result);
+ 		return NULL;
+ 	}
+ 	if (!PyObject_HasAttrString(it, "__copy__")) {
+ 		copyable = tee_fromiterable(it);
+ 		Py_DECREF(it);
+ 		if (copyable == NULL) {
+ 			Py_DECREF(result);
+ 			return NULL;
+ 		}
+ 	} else
+ 		copyable = it;
+ 	PyTuple_SET_ITEM(result, 0, copyable);
+ 	for (i=1 ; i<n ; i++) {
+ 		copyable = PyObject_CallMethod(copyable, "__copy__", NULL);
+ 		if (copyable == NULL) {
+ 			Py_DECREF(result);
+ 			return NULL;
+ 		}
+ 		PyTuple_SET_ITEM(result, i, copyable);
+ 	}
+ 	return result;
+ }
+ 
+ PyDoc_STRVAR(tee_doc,
+ "tee(iterable, n=2) --> tuple of n independent iterators.");
+ 
+ 
  /* cycle object **********************************************************/
  
***************
*** 2092,2096 ****
  imap(fun, p, q, ...) --> fun(p0, q0), fun(p1, q1), ...\n\
  starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\
! tee(it) --> (it1, it2) splits one iterator into two \n\
  chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\
  takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
--- 2100,2104 ----
  imap(fun, p, q, ...) --> fun(p0, q0), fun(p1, q1), ...\n\
  starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\
! tee(it, n=2) --> (it1, it2 , ... itn) splits one iterator into n\n\
  chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\
  takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
***************
*** 2099,2102 ****
--- 2107,2115 ----
  
  
+ static PyMethodDef module_methods[] = {
+ 	{"tee",	(PyCFunction)tee,	METH_VARARGS, tee_doc},
+  	{NULL,		NULL}		/* sentinel */
+ };
+ 
  PyMODINIT_FUNC
  inititertools(void)
***************
*** 2106,2110 ****
  	char *name;
  	PyTypeObject *typelist[] = {
- 		&tee_type,
  		&cycle_type,
  		&dropwhile_type,
--- 2119,2122 ----
***************
*** 2122,2126 ****
  	};
  
! 	m = Py_InitModule3("itertools", NULL, module_doc);
  
  	for (i=0 ; typelist[i] != NULL ; i++) {
--- 2134,2138 ----
  	};
  
! 	m = Py_InitModule3("itertools", module_methods, module_doc);
  
  	for (i=0 ; typelist[i] != NULL ; i++) {
***************
*** 2132,2134 ****
--- 2144,2152 ----
  		PyModule_AddObject(m, name+1, (PyObject *)typelist[i]);
  	}
+ 
+ 	if (PyType_Ready(&teedataobject_type) < 0)
+ 		return;
+ 	if (PyType_Ready(&tee_type) < 0)
+ 		return;
+ 
  }





More information about the Python-checkins mailing list