[Python-checkins] python/dist/src/Modules itertoolsmodule.c, 1.25,
1.26
rhettinger at users.sourceforge.net
rhettinger at users.sourceforge.net
Wed Nov 12 09:32:28 EST 2003
Update of /cvsroot/python/python/dist/src/Modules
In directory sc8-pr-cvs1:/tmp/cvs-serv16547/Modules
Modified Files:
itertoolsmodule.c
Log Message:
Improve the implementation of itertools.tee().
Formerly, underlying queue was implemented in terms of two lists. The
new queue is a series of singly-linked fixed length lists.
The new implementation runs much faster, supports multi-way tees, and
allows tees of tees without additional memory costs.
The root ideas for this structure were contributed by Andrew Koenig
and Guido van Rossum.
Index: itertoolsmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/itertoolsmodule.c,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -d -r1.25 -r1.26
*** itertoolsmodule.c 28 Oct 2003 07:32:28 -0000 1.25
--- itertoolsmodule.c 12 Nov 2003 14:32:26 -0000 1.26
***************
*** 8,127 ****
*/
! /* independent iterator object supporting the tee object ***************/
!
! /* The tee object maintains a queue of data seen by the leading iterator
! but not seen by the trailing iterator. When the leading iterator
! gets data from PyIter_Next() it appends a copy to the inbasket stack.
! When the trailing iterator needs data, it is popped from the outbasket
! stack. If the outbasket stack is empty, then it is filled from the
! inbasket (i.e. the queue is implemented using two stacks so that only
! O(n) operations like append() and pop() are used to access data and
! calls to reverse() never move any data element more than once).
! If one of the independent iterators gets deallocated, it sets tee's
! save_mode to zero so that future calls to PyIter_Next() stop getting
! saved to the queue (because there is no longer a second iterator that
! may need the data).
*/
typedef struct {
PyObject_HEAD
PyObject *it;
! PyObject *inbasket;
! PyObject *outbasket;
! int save_mode;
! int num_seen;
! } teeobject;
typedef struct {
PyObject_HEAD
! teeobject *tee;
! int num_seen;
! } iiobject;
! static PyTypeObject ii_type;
static PyObject *
! ii_next(iiobject *lz)
{
! teeobject *to = lz->tee;
! PyObject *result, *tmp;
! int n;
! if (lz->num_seen == to->num_seen) {
! /* This instance is leading, use iter to get more data */
! result = PyIter_Next(to->it);
! if (result == NULL)
! return NULL;
! if (to->save_mode)
! if(PyList_Append(to->inbasket, result) == -1){
! Py_DECREF(result);
! return NULL;
! }
! to->num_seen++;
! lz->num_seen++;
! return result;
! }
! /* This instance is trailing, get data from the queue */
! if (PyList_GET_SIZE(to->outbasket) == 0) {
! /* outbasket is empty, so refill from the inbasket */
! tmp = to->outbasket;
! to->outbasket = to->inbasket;
! to->inbasket = tmp;
! PyList_Reverse(to->outbasket);
! }
! /* Pop result from to->outbasket */
! n = PyList_GET_SIZE(to->outbasket);
! assert(n>0);
! result = PyList_GET_ITEM(to->outbasket, n-1);
! Py_INCREF(result);
! if (PyList_SetSlice(to->outbasket, n-1, n, NULL) == -1) {
! Py_DECREF(result);
! return NULL;
! }
! lz->num_seen++;
! return result;
}
! static void
! ii_dealloc(iiobject *ii)
{
! teeobject *to = ii->tee;
! int n;
! PyObject_GC_UnTrack(ii);
! ii->tee->save_mode = 0; /* Stop saving data */
! if (ii->num_seen < to->num_seen) {
! /* It is the trailing iterator being freed; this means
! that the stored queue data is no longer needed */
! n = PyList_GET_SIZE(to->inbasket);
! PyList_SetSlice(to->inbasket, 0, n, NULL);
! n = PyList_GET_SIZE(to->outbasket);
! PyList_SetSlice(to->outbasket, 0, n, NULL);
}
! Py_XDECREF(ii->tee);
! PyObject_GC_Del(ii);
}
! static int
! ii_traverse(iiobject *ii, visitproc visit, void *arg)
{
! if (ii->tee)
! return visit((PyObject *)(ii->tee), arg);
! return 0;
}
! PyDoc_STRVAR(ii_doc, "Independent iterator created by tee(iterable).");
! static PyTypeObject ii_type = {
PyObject_HEAD_INIT(&PyType_Type)
0, /* ob_size */
! "itertools.tee_iterator", /* tp_name */
! sizeof(iiobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
! (destructor)ii_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
--- 8,107 ----
*/
! /* tee object and with supporting function and objects ***************/
! /* The teedataobject pre-allocates space for LINKCELLS number of objects.
! To help the object fit neatly inside cache lines (space for 16 to 32
! pointers), the value should be a multiple of 16 minus space for
! the other structure members including PyHEAD overhead. The larger the
! value, the less memory overhead per object and the less time spent
! allocating/deallocating new links. The smaller the number, the less
! wasted space and the more rapid freeing of older data.
*/
+ #define LINKCELLS 57
typedef struct {
PyObject_HEAD
PyObject *it;
! int numread;
! PyObject *nextlink;
! PyObject *(values[LINKCELLS]);
! } teedataobject;
typedef struct {
PyObject_HEAD
! teedataobject *dataobj;
! int index;
! } teeobject;
! static PyTypeObject teedataobject_type;
static PyObject *
! teedataobject_new(PyObject *it)
{
! teedataobject *tdo;
! tdo = PyObject_New(teedataobject, &teedataobject_type);
! if (tdo == NULL)
! return NULL;
! tdo->numread = 0;
! tdo->nextlink = NULL;
! Py_INCREF(it);
! tdo->it = it;
! return (PyObject *)tdo;
! }
! static PyObject *
! teedataobject_jumplink(teedataobject *tdo)
! {
! if (tdo->nextlink == NULL)
! tdo->nextlink = teedataobject_new(tdo->it);
! Py_INCREF(tdo->nextlink);
! return tdo->nextlink;
}
! static PyObject *
! teedataobject_getitem(teedataobject *tdo, int i)
{
! PyObject *value;
! assert(i < LINKCELLS);
! if (i < tdo->numread)
! value = tdo->values[i];
! else {
! /* this is the lead iterator, so fetch more data */
! assert(i == tdo->numread);
! value = PyIter_Next(tdo->it);
! if (value == NULL)
! return NULL;
! tdo->numread++;
! tdo->values[i] = value;
}
! Py_INCREF(value);
! return value;
}
! static void
! teedataobject_dealloc(teedataobject *tdo)
{
! int i;
!
! for (i=0 ; i<tdo->numread ; i++)
! Py_DECREF(tdo->values[i]);
! Py_XDECREF(tdo->it);
! Py_XDECREF(tdo->nextlink);
! PyObject_Del(tdo);
}
! PyDoc_STRVAR(teedataobject_doc, "Data container common to multiple tee objects.");
! static PyTypeObject teedataobject_type = {
PyObject_HEAD_INIT(&PyType_Type)
0, /* ob_size */
! "itertools.tee_dataobject", /* tp_name */
! sizeof(teedataobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
! (destructor)teedataobject_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
***************
*** 138,247 ****
0, /* tp_setattro */
0, /* tp_as_buffer */
! Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
! ii_doc, /* tp_doc */
! (traverseproc)ii_traverse, /* tp_traverse */
! 0, /* tp_clear */
! 0, /* tp_richcompare */
! 0, /* tp_weaklistoffset */
! PyObject_SelfIter, /* tp_iter */
! (iternextfunc)ii_next, /* tp_iternext */
! 0, /* tp_methods */
};
- /* tee object **********************************************************/
static PyTypeObject tee_type;
static PyObject *
! tee_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
! PyObject *it = NULL;
! PyObject *iterable;
! PyObject *inbasket = NULL, *outbasket = NULL, *result = NULL;
! teeobject *to = NULL;
! int i;
! if (!PyArg_UnpackTuple(args, "tee", 1, 1, &iterable))
return NULL;
! it = PyObject_GetIter(iterable);
! if (it == NULL) goto fail;
!
! inbasket = PyList_New(0);
! if (inbasket == NULL) goto fail;
! outbasket = PyList_New(0);
! if (outbasket == NULL) goto fail;
! to = PyObject_GC_New(teeobject, &tee_type);
! if (to == NULL) goto fail;
! to->it = it;
! to->inbasket = inbasket;
! to->outbasket = outbasket;
! to->save_mode = 1;
! to->num_seen = 0;
! PyObject_GC_Track(to);
! /* create independent iterators */
! result = PyTuple_New(2);
! if (result == NULL) goto fail;
! for (i=0 ; i<2 ; i++) {
! iiobject *indep_it = PyObject_GC_New(iiobject, &ii_type);
! if (indep_it == NULL) goto fail;
! Py_INCREF(to);
! indep_it->tee = to;
! indep_it->num_seen = 0;
! PyObject_GC_Track(indep_it);
! PyTuple_SET_ITEM(result, i, (PyObject *)indep_it);
}
! goto succeed;
! fail:
Py_XDECREF(it);
! Py_XDECREF(inbasket);
! Py_XDECREF(outbasket);
! Py_XDECREF(result);
! succeed:
! Py_XDECREF(to);
! return result;
}
! static void
! tee_dealloc(teeobject *to)
{
! PyObject_GC_UnTrack(to);
! Py_XDECREF(to->inbasket);
! Py_XDECREF(to->outbasket);
! Py_XDECREF(to->it);
! PyObject_GC_Del(to);
}
! static int
! tee_traverse(teeobject *to, visitproc visit, void *arg)
{
! int err;
!
! if (to->it) {
! err = visit(to->it, arg);
! if (err)
! return err;
! }
! if (to->inbasket) {
! err = visit(to->inbasket, arg);
! if (err)
! return err;
! }
! if (to->outbasket) {
! err = visit(to->outbasket, arg);
! if (err)
! return err;
! }
! return 0;
}
! PyDoc_STRVAR(tee_doc,
! "tee(iterable) --> (it1, it2)\n\
! \n\
! Split the iterable into two independent iterables.");
static PyTypeObject tee_type = {
--- 118,211 ----
0, /* tp_setattro */
0, /* tp_as_buffer */
! Py_TPFLAGS_DEFAULT, /* tp_flags */
! teedataobject_doc, /* tp_doc */
! 0, /* tp_traverse */
};
static PyTypeObject tee_type;
static PyObject *
! tee_next(teeobject *to)
{
! PyObject *value, *link;
! if (to->index >= LINKCELLS) {
! link = teedataobject_jumplink(to->dataobj);
! Py_XDECREF(to->dataobj);
! to->dataobj = (teedataobject *)link;
! to->index = 0;
! }
! value = teedataobject_getitem(to->dataobj, to->index);
! if (value == NULL)
return NULL;
+ to->index++;
+ return value;
+ }
! static PyObject *
! tee_copy(teeobject *to)
! {
! teeobject *newto;
! newto = PyObject_New(teeobject, &tee_type);
! if (newto == NULL)
! return NULL;
! Py_INCREF(to->dataobj);
! newto->dataobj = to->dataobj;
! newto->index = to->index;
! return (PyObject *)newto;
! }
! PyDoc_STRVAR(teecopy_doc, "Returns an independent iterator.");
! static PyObject *
! tee_fromiterable(PyObject *iterable)
! {
! teeobject *to;
! PyObject *it = NULL;
! it = PyObject_GetIter(iterable);
! if (it == NULL)
! return NULL;
! if (PyObject_TypeCheck(it, &tee_type)) {
! to = (teeobject *)tee_copy((teeobject *)it);
! goto done;
}
!
! to = PyObject_New(teeobject, &tee_type);
! if (to == NULL)
! goto done;
! to->dataobj = (teedataobject *)teedataobject_new(it);
! to->index = 0;
! done:
Py_XDECREF(it);
! return (PyObject *)to;
}
! static PyObject *
! tee_new(PyTypeObject *type, PyObject *args, PyObject *kw)
{
! PyObject *iterable;
!
! if (!PyArg_UnpackTuple(args, "tee", 1, 1, &iterable))
! return NULL;
! return tee_fromiterable(iterable);
}
! static void
! tee_dealloc(teeobject *to)
{
! Py_XDECREF(to->dataobj);
! PyObject_Del(to);
}
! PyDoc_STRVAR(teeobject_doc,
! "Iterator wrapped to make it copyable");
!
! static PyMethodDef tee_methods[] = {
! {"__copy__", (PyCFunction)tee_copy, METH_NOARGS, teecopy_doc},
! {NULL, NULL} /* sentinel */
! };
static PyTypeObject tee_type = {
***************
*** 267,279 ****
0, /* tp_setattro */
0, /* tp_as_buffer */
! Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
! tee_doc, /* tp_doc */
! (traverseproc)tee_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
! 0, /* tp_iter */
! 0, /* tp_iternext */
! 0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
--- 231,243 ----
0, /* tp_setattro */
0, /* tp_as_buffer */
! Py_TPFLAGS_DEFAULT, /* tp_flags */
! teeobject_doc, /* tp_doc */
! 0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
! PyObject_SelfIter, /* tp_iter */
! (iternextfunc)tee_next, /* tp_iternext */
! tee_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
***************
*** 286,291 ****
--- 250,299 ----
0, /* tp_alloc */
tee_new, /* tp_new */
+ PyObject_Del, /* tp_free */
};
+ static PyObject *
+ tee(PyObject *self, PyObject *args)
+ {
+ int i, n=2;
+ PyObject *it, *iterable, *copyable, *result;
+
+ if (!PyArg_ParseTuple(args, "O|i", &iterable, &n))
+ return NULL;
+ result = PyTuple_New(n);
+ if (result == NULL)
+ return NULL;
+ if (n == 0)
+ return result;
+ it = PyObject_GetIter(iterable);
+ if (it == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ if (!PyObject_HasAttrString(it, "__copy__")) {
+ copyable = tee_fromiterable(it);
+ Py_DECREF(it);
+ if (copyable == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ } else
+ copyable = it;
+ PyTuple_SET_ITEM(result, 0, copyable);
+ for (i=1 ; i<n ; i++) {
+ copyable = PyObject_CallMethod(copyable, "__copy__", NULL);
+ if (copyable == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(result, i, copyable);
+ }
+ return result;
+ }
+
+ PyDoc_STRVAR(tee_doc,
+ "tee(iterable, n=2) --> tuple of n independent iterators.");
+
+
/* cycle object **********************************************************/
***************
*** 2092,2096 ****
imap(fun, p, q, ...) --> fun(p0, q0), fun(p1, q1), ...\n\
starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\
! tee(it) --> (it1, it2) splits one iterator into two \n\
chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\
takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
--- 2100,2104 ----
imap(fun, p, q, ...) --> fun(p0, q0), fun(p1, q1), ...\n\
starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\
! tee(it, n=2) --> (it1, it2 , ... itn) splits one iterator into n\n\
chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\
takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
***************
*** 2099,2102 ****
--- 2107,2115 ----
+ static PyMethodDef module_methods[] = {
+ {"tee", (PyCFunction)tee, METH_VARARGS, tee_doc},
+ {NULL, NULL} /* sentinel */
+ };
+
PyMODINIT_FUNC
inititertools(void)
***************
*** 2106,2110 ****
char *name;
PyTypeObject *typelist[] = {
- &tee_type,
&cycle_type,
&dropwhile_type,
--- 2119,2122 ----
***************
*** 2122,2126 ****
};
! m = Py_InitModule3("itertools", NULL, module_doc);
for (i=0 ; typelist[i] != NULL ; i++) {
--- 2134,2138 ----
};
! m = Py_InitModule3("itertools", module_methods, module_doc);
for (i=0 ; typelist[i] != NULL ; i++) {
***************
*** 2132,2134 ****
--- 2144,2152 ----
PyModule_AddObject(m, name+1, (PyObject *)typelist[i]);
}
+
+ if (PyType_Ready(&teedataobject_type) < 0)
+ return;
+ if (PyType_Ready(&tee_type) < 0)
+ return;
+
}
More information about the Python-checkins
mailing list