[Python-checkins] python/nondist/sandbox/itertools itertools.c,1.9,1.10 libitertools.tex,1.9,1.10 test_itertools.py,1.6,1.7 todo.txt,1.9,1.10
rhettinger@users.sourceforge.net
rhettinger@users.sourceforge.net
Mon, 27 Jan 2003 15:33:28 -0800
Update of /cvsroot/python/python/nondist/sandbox/itertools
In directory sc8-pr-cvs1:/tmp/cvs-serv27314
Modified Files:
itertools.c libitertools.tex test_itertools.py todo.txt
Log Message:
Skip pointed out that loopzip() had unsavory qualities.
Replaced it with a safer, cleaner izip().
Added doctest for examples in the library reference.
Index: itertools.c
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/itertools/itertools.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** itertools.c 27 Jan 2003 15:22:59 -0000 1.9
--- itertools.c 27 Jan 2003 23:33:26 -0000 1.10
***************
*** 1082,1086 ****
! /* loopzip object ************************************************************/
#include "Python.h"
--- 1082,1086 ----
! /* izip object ************************************************************/
#include "Python.h"
***************
*** 1090,1102 ****
long tuplesize;
PyObject *ittuple; /* tuple of iterators */
! PyObject *result;
! } loopzipobject;
! PyTypeObject loopzip_type;
static PyObject *
! loopzip_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
! loopzipobject *lz;
int i;
PyObject *ittuple; /* tuple of iterators */
--- 1090,1101 ----
long tuplesize;
PyObject *ittuple; /* tuple of iterators */
! } izipobject;
! PyTypeObject izip_type;
static PyObject *
! izip_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
! izipobject *lz;
int i;
PyObject *ittuple; /* tuple of iterators */
***************
*** 1105,1109 ****
if (tuplesize < 1) {
PyErr_SetString(PyExc_TypeError,
! "loopzip() requires at least one sequence");
return NULL;
}
--- 1104,1108 ----
if (tuplesize < 1) {
PyErr_SetString(PyExc_TypeError,
! "izip() requires at least one sequence");
return NULL;
}
***************
*** 1116,1120 ****
if(ittuple == NULL)
return NULL;
! for (i = 0; i < tuplesize; ++i) {
PyObject *item = PyTuple_GET_ITEM(args, i);
PyObject *it = PyObject_GetIter(item);
--- 1115,1119 ----
if(ittuple == NULL)
return NULL;
! for (i=0; i < tuplesize; ++i) {
PyObject *item = PyTuple_GET_ITEM(args, i);
PyObject *it = PyObject_GetIter(item);
***************
*** 1122,1126 ****
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
! "loopzip argument #%d must support iteration",
i+1);
Py_DECREF(ittuple);
--- 1121,1125 ----
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
! "izip argument #%d must support iteration",
i+1);
Py_DECREF(ittuple);
***************
*** 1130,1135 ****
}
! /* create loopzipobject structure */
! lz = (loopzipobject *)type->tp_alloc(type, 0);
if (lz == NULL) {
Py_DECREF(ittuple);
--- 1129,1134 ----
}
! /* create izipobject structure */
! lz = (izipobject *)type->tp_alloc(type, 0);
if (lz == NULL) {
Py_DECREF(ittuple);
***************
*** 1139,1168 ****
lz->tuplesize = tuplesize;
- /* create result holder */
- lz->result = PyList_New(tuplesize);
- if (lz->result == NULL) {
- Py_DECREF(ittuple);
- Py_DECREF(lz);
- return NULL;
- }
- for (i=0 ; i < tuplesize ; i++) {
- Py_INCREF(Py_None);
- PyList_SET_ITEM(lz->result, i, Py_None);
- }
-
return (PyObject *)lz;
}
static void
! loopzip_dealloc(loopzipobject *lz)
{
PyObject_GC_UnTrack(lz);
Py_XDECREF(lz->ittuple);
- Py_XDECREF(lz->result);
lz->ob_type->tp_free(lz);
}
static int
! loopzip_traverse(loopzipobject *lz, visitproc visit, void *arg)
{
if (lz->ittuple)
--- 1138,1154 ----
lz->tuplesize = tuplesize;
return (PyObject *)lz;
}
static void
! izip_dealloc(izipobject *lz)
{
PyObject_GC_UnTrack(lz);
Py_XDECREF(lz->ittuple);
lz->ob_type->tp_free(lz);
}
static int
! izip_traverse(izipobject *lz, visitproc visit, void *arg)
{
if (lz->ittuple)
***************
*** 1172,1192 ****
static PyObject *
! loopzip_next(loopzipobject *lz)
{
int i;
long tuplesize = lz->tuplesize;
! PyObject *result = lz->result;
PyObject *it;
PyObject *item;
! /* XXX: Add check that resultsize == tuplesize */
for (i=0 ; i < tuplesize ; i++) {
- item = PyList_GET_ITEM(result, i);
- Py_DECREF(item);
it = PyTuple_GET_ITEM(lz->ittuple, i);
item = PyIter_Next(it);
if (item == NULL)
return NULL;
! PyList_SET_ITEM(result, i, item);
}
Py_INCREF(result);
--- 1158,1179 ----
static PyObject *
! izip_next(izipobject *lz)
{
int i;
long tuplesize = lz->tuplesize;
! PyObject *result;
PyObject *it;
PyObject *item;
! result = PyTuple_New(tuplesize);
! if (result == NULL)
! return NULL;
!
for (i=0 ; i < tuplesize ; i++) {
it = PyTuple_GET_ITEM(lz->ittuple, i);
item = PyIter_Next(it);
if (item == NULL)
return NULL;
! PyTuple_SET_ITEM(result, i, item);
}
Py_INCREF(result);
***************
*** 1195,1199 ****
static PyObject *
! loopzip_getiter(PyObject *lz)
{
Py_INCREF(lz);
--- 1182,1186 ----
static PyObject *
! izip_getiter(PyObject *lz)
{
Py_INCREF(lz);
***************
*** 1201,1225 ****
}
! PyDoc_STRVAR(loopzip_doc,
! "loopzip(iter1 [,iter2 [...]]) --> loopzip object\n\
\n\
! Return a loopzip object whose .next() method returns a list where\n\
the i-th element comes from the i-th iterable argument. The .next()\n\
! method updates the returns the same list everytime until the shortest\n\
! iterable in the argument sequence is exhausted and then it raises\n\
! StopIteration. Works like the zip() function but consumes less memory.\n\
! Unlike zip, it returns an iterator and the n-th return is a list rather\n\
! than a tuple. It is appropriate for use in loops, but not for conversion\n\
! to a list. For example: list(loopzip('abc')) returns a list of three\n\
! identical sublists which is usually not what was intended.");
! PyTypeObject loopzip_type = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
! "itertools.loopzip", /* tp_name */
! sizeof(loopzipobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
! (destructor)loopzip_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
--- 1188,1209 ----
}
! PyDoc_STRVAR(izip_doc,
! "izip(iter1 [,iter2 [...]]) --> izip object\n\
\n\
! Return a izip object whose .next() method returns a tuple where\n\
the i-th element comes from the i-th iterable argument. The .next()\n\
! method continues until the shortest iterable in the argument sequence\n\
! is exhausted and then it raises StopIteration. Works like the zip()\n
! function but consumes less memory by returning an iterator instead of\n\
! a list.");
! PyTypeObject izip_type = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
! "itertools.izip", /* tp_name */
! sizeof(izipobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
! (destructor)izip_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
***************
*** 1238,1248 ****
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE, /* tp_flags */
! loopzip_doc, /* tp_doc */
! (traverseproc)loopzip_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
! (getiterfunc)loopzip_getiter, /* tp_iter */
! (iternextfunc)loopzip_next, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
--- 1222,1232 ----
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE, /* tp_flags */
! izip_doc, /* tp_doc */
! (traverseproc)izip_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
! (getiterfunc)izip_getiter, /* tp_iter */
! (iternextfunc)izip_next, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
***************
*** 1255,1259 ****
0, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
! loopzip_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
--- 1239,1243 ----
0, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
! izip_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
***************
*** 1375,1380 ****
\n\
Iterators terminating on the shortest input sequence:\n\
! loopzip(p, q, ...) --> [p[0], q[0]], [p[1], q[1]], ... \n\
! same list each time but with updated contents\n\
ifilter(pred, seq, invert=False) --> elements of seq where\n\
pred(elem) is True (or False if invert is set)\n\
--- 1359,1363 ----
\n\
Iterators terminating on the shortest input sequence:\n\
! izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
ifilter(pred, seq, invert=False) --> elements of seq where\n\
pred(elem) is True (or False if invert is set)\n\
***************
*** 1435,1442 ****
PyModule_AddObject(m, "count", (PyObject *)&count_type);
! if (PyType_Ready(&loopzip_type) < 0)
return;
! Py_INCREF(&loopzip_type);
! PyModule_AddObject(m, "loopzip", (PyObject *)&loopzip_type);
if (PyType_Ready(&repeat_type) < 0)
--- 1418,1425 ----
PyModule_AddObject(m, "count", (PyObject *)&count_type);
! if (PyType_Ready(&izip_type) < 0)
return;
! Py_INCREF(&izip_type);
! PyModule_AddObject(m, "izip", (PyObject *)&izip_type);
if (PyType_Ready(&repeat_type) < 0)
Index: libitertools.tex
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/itertools/libitertools.tex,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** libitertools.tex 27 Jan 2003 15:42:54 -0000 1.9
--- libitertools.tex 27 Jan 2003 23:33:26 -0000 1.10
***************
*** 38,62 ****
penalty.
- \item Wherever straight-forward alternatives exist, the corresponding
- tools in this module seek to meet a different need and are designed
- for speed. In fact, the \emph{sole} justification for this module
- being written in C is its speed advantage.
-
- For instance, the
- \module{__builtins__} module has an easy-to-use, no surprises version
- of \function(zip()). This module's corresponding function,
- \function{loopzip()} returns an iterator rather than a full list.
- Also, calls to the iterator return a mutable list rather than a tuple
- and it returns the \emph{same} list on each pass. Used in a
- \keyword{for} loop, \function{loopzip()} can be directly substituted
- for \function{zip()} and run much faster. It has nearly zero
- overhead since the looping is done in C code (bypassing Python's eval
- loop); since it returns an iterator (saving the need to allocate a
- list and append to it an element at a time); and since it reuses just
- one output list (saving the time to allocate and build a tuple on
- every pass). Though very fast, using \function{loopzip()} outside of
- a \keyword{for} loop or other itertool can result in surprising
- behavior and an unwelcome refresher lesson in mutability.
-
\item Another source of value comes from standardizing a core set of tools
to avoid the readability and reliability problems that arise when many
--- 38,41 ----
***************
*** 178,199 ****
\end{funcdesc}
! \begin{funcdesc}{loopzip}{*iterables}
Make an iterator that aggregates elements from each of the iterables.
Like \function{zip()} except that it returns an iterator instead of
! a list and the individual elements are stored in a list rather than
! in a tuple. The \emph{same} list is used for each pass and only the
! contents are updated; hence, \function{loopzip()} is only appropriate
! in a \keyword{for} loop or other itertool. The iterator terminates
! with \exception{StopIteration} when the first of the iterables is
! exhausted. Equivalent to:
\begin{verbatim}
! def loopzip(*iterables):
! iterables = map(iter, iterables)
! result = [None] * len(iterables)
! while True:
! for i in xrange(len(iterables)):
! result[i] = iterables[i].next()
! yield result
\end{verbatim}
\end{funcdesc}
--- 157,171 ----
\end{funcdesc}
! \begin{funcdesc}{izip}{*iterables}
Make an iterator that aggregates elements from each of the iterables.
Like \function{zip()} except that it returns an iterator instead of
! a list. Equivalent to:
\begin{verbatim}
! def izip(*iterables):
! iterables = map(iter, iterables)
! while True:
! result = [i.next() for i in iterables]
! yield tuple(result)
\end{verbatim}
\end{funcdesc}
***************
*** 266,270 ****
Hello
! >>> for checknum, amount in loopzip(count(1200), amounts):
... print 'Check %d is for $%.2f' % (checknum, amount)
...
--- 238,242 ----
Hello
! >>> for checknum, amount in izip(count(1200), amounts):
... print 'Check %d is for $%.2f' % (checknum, amount)
...
***************
*** 273,276 ****
--- 245,249 ----
Check 1202 is for $823.14
+ >>> import operator
>>> bases = [2, 3, 5, 7]
>>> powers = [2, 3, 4]
***************
*** 278,281 ****
--- 251,258 ----
... print list(imap(operator.pow, bases, repeat(power)))
...
+ [4, 9, 25, 49]
+ [8, 27, 125, 343]
+ [16, 81, 625, 2401]
+
\end{verbatim}
Index: test_itertools.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/itertools/test_itertools.py,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** test_itertools.py 27 Jan 2003 12:16:42 -0000 1.6
--- test_itertools.py 27 Jan 2003 23:33:26 -0000 1.7
***************
*** 19,27 ****
self.assertRaises(TypeError, ifilter, isEven, [3], True, 4)
! def test_loopzip(self):
! ans = [(x,y) for x, y in loopzip('abc',count())]
self.assertEqual(ans, [('a', 0), ('b', 1), ('c', 2)])
! self.assertEqual(list(loopzip('abc',count())), [['c', 2]] * 3)
! self.assertRaises(TypeError, loopzip)
def test_repeat(self):
--- 19,26 ----
self.assertRaises(TypeError, ifilter, isEven, [3], True, 4)
! def test_izip(self):
! ans = [(x,y) for x, y in izip('abc',count())]
self.assertEqual(ans, [('a', 0), ('b', 1), ('c', 2)])
! self.assertRaises(TypeError, izip)
def test_repeat(self):
***************
*** 80,84 ****
self.assertEqual(list(dropwhile(underten, data)), [20, 2, 4, 6, 8])
! def test_main():
suite = unittest.TestSuite()
for testclass in (TestBasicOps,
--- 79,126 ----
self.assertEqual(list(dropwhile(underten, data)), [20, 2, 4, 6, 8])
! libreftest = """ Doctest for examples in the library reference, libitertools.tex
!
! >>> for i in times(3):
! ... print "Hello"
! ...
! Hello
! Hello
! Hello
!
! >>> amounts = [120.15, 764.05, 823.14]
! >>> for checknum, amount in izip(count(1200), amounts):
! ... print 'Check %d is for $%.2f' % (checknum, amount)
! ...
! Check 1200 is for $120.15
! Check 1201 is for $764.05
! Check 1202 is for $823.14
!
! >>> import operator
! >>> bases = [2, 3, 5, 7]
! >>> powers = [2, 3, 4]
! >>> for power in powers:
! ... print list(imap(operator.pow, bases, repeat(power)))
! ...
! [4, 9, 25, 49]
! [8, 27, 125, 343]
! [16, 81, 625, 2401]
!
!
! >>> def enumerate(s):
! ... return izip(count(), s)
! >>> def tabulate(f):
! ... return imap(f, count())
! >>> def iteritems(d):
! ... return izip(d.iterkeys(), d.itervalues())
! >>> def nth(s, n):
! ... return islice(n, n+1).next()
!
!
! """
!
! __test__ = {'libreftest' : libreftest}
!
! def test_main(verbose=None):
! import test_itertools
suite = unittest.TestSuite()
for testclass in (TestBasicOps,
***************
*** 86,90 ****
suite.addTest(unittest.makeSuite(testclass))
test_support.run_suite(suite)
if __name__ == "__main__":
! test_main()
--- 128,133 ----
suite.addTest(unittest.makeSuite(testclass))
test_support.run_suite(suite)
+ test_support.run_doctest(test_itertools, verbose)
if __name__ == "__main__":
! test_main(verbose=True)
Index: todo.txt
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/itertools/todo.txt,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** todo.txt 27 Jan 2003 12:16:42 -0000 1.9
--- todo.txt 27 Jan 2003 23:33:26 -0000 1.10
***************
*** 1,2 ****
--- 1,10 ----
+ Comments from Skip and Jack:
+ func=None in map
+ provide in-line motivating examples
+ ? add default arg to times()
+
+ Doctest:
+ make islice() python code perform the same as the C code
+
Add:
iapply(func) ?? what did this do in SML
***************
*** 12,16 ****
Things dropped because they bug me:
cycle(seqn) requires auxilliary storage (which is surprising
! behavior for iterators). This is best left for pure python.
Things that just bug me:
--- 20,26 ----
Things dropped because they bug me:
cycle(seqn) requires auxilliary storage (which is surprising
! behavior for iterators). This is best left for pure python.
! loopzip(s1, s2, ...) returned mutuable lists which could be
! prone to creating hard-to-find errors.
Things that just bug me: