[Python-checkins] python/nondist/sandbox/itertools itertools.c,1.9,1.10 libitertools.tex,1.9,1.10 test_itertools.py,1.6,1.7 todo.txt,1.9,1.10

rhettinger@users.sourceforge.net rhettinger@users.sourceforge.net
Mon, 27 Jan 2003 15:33:28 -0800


Update of /cvsroot/python/python/nondist/sandbox/itertools
In directory sc8-pr-cvs1:/tmp/cvs-serv27314

Modified Files:
	itertools.c libitertools.tex test_itertools.py todo.txt 
Log Message:
Skip pointed out that loopzip() had unsavory qualities.
Replaced it with a safer, cleaner izip().

Added doctest for examples in the library reference.



Index: itertools.c
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/itertools/itertools.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** itertools.c	27 Jan 2003 15:22:59 -0000	1.9
--- itertools.c	27 Jan 2003 23:33:26 -0000	1.10
***************
*** 1082,1086 ****
  
  
! /* loopzip object ************************************************************/
  
  #include "Python.h"
--- 1082,1086 ----
  
  
! /* izip object ************************************************************/
  
  #include "Python.h"
***************
*** 1090,1102 ****
  	long	tuplesize;
  	PyObject *ittuple;		/* tuple of iterators */
! 	PyObject *result;
! } loopzipobject;
  
! PyTypeObject loopzip_type;
  
  static PyObject *
! loopzip_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  {
! 	loopzipobject *lz;
  	int i;
  	PyObject *ittuple;  /* tuple of iterators */
--- 1090,1101 ----
  	long	tuplesize;
  	PyObject *ittuple;		/* tuple of iterators */
! } izipobject;
  
! PyTypeObject izip_type;
  
  static PyObject *
! izip_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  {
! 	izipobject *lz;
  	int i;
  	PyObject *ittuple;  /* tuple of iterators */
***************
*** 1105,1109 ****
  	if (tuplesize < 1) {
  		PyErr_SetString(PyExc_TypeError,
! 				"loopzip() requires at least one sequence");
  		return NULL;
  	}
--- 1104,1108 ----
  	if (tuplesize < 1) {
  		PyErr_SetString(PyExc_TypeError,
! 				"izip() requires at least one sequence");
  		return NULL;
  	}
***************
*** 1116,1120 ****
  	if(ittuple == NULL)
  		return NULL;
! 	for (i = 0; i < tuplesize; ++i) {
  		PyObject *item = PyTuple_GET_ITEM(args, i);
  		PyObject *it = PyObject_GetIter(item);
--- 1115,1119 ----
  	if(ittuple == NULL)
  		return NULL;
! 	for (i=0; i < tuplesize; ++i) {
  		PyObject *item = PyTuple_GET_ITEM(args, i);
  		PyObject *it = PyObject_GetIter(item);
***************
*** 1122,1126 ****
  			if (PyErr_ExceptionMatches(PyExc_TypeError))
  				PyErr_Format(PyExc_TypeError,
! 				    "loopzip argument #%d must support iteration",
  				    i+1);
  			Py_DECREF(ittuple);
--- 1121,1125 ----
  			if (PyErr_ExceptionMatches(PyExc_TypeError))
  				PyErr_Format(PyExc_TypeError,
! 				    "izip argument #%d must support iteration",
  				    i+1);
  			Py_DECREF(ittuple);
***************
*** 1130,1135 ****
  	}
  
! 	/* create loopzipobject structure */
! 	lz = (loopzipobject *)type->tp_alloc(type, 0);
  	if (lz == NULL) {
  		Py_DECREF(ittuple);
--- 1129,1134 ----
  	}
  
! 	/* create izipobject structure */
! 	lz = (izipobject *)type->tp_alloc(type, 0);
  	if (lz == NULL) {
  		Py_DECREF(ittuple);
***************
*** 1139,1168 ****
  	lz->tuplesize = tuplesize;
  
- 	/* create result holder */
- 	lz->result = PyList_New(tuplesize);
- 	if (lz->result == NULL) {
- 		Py_DECREF(ittuple);
- 		Py_DECREF(lz);
- 		return NULL;
- 	}
- 	for (i=0 ; i < tuplesize ; i++) {
- 		Py_INCREF(Py_None);
- 		PyList_SET_ITEM(lz->result, i, Py_None);
- 	}
- 
  	return (PyObject *)lz;
  }
  
  static void
! loopzip_dealloc(loopzipobject *lz)
  {
  	PyObject_GC_UnTrack(lz);
  	Py_XDECREF(lz->ittuple);
- 	Py_XDECREF(lz->result);
  	lz->ob_type->tp_free(lz);
  }
  
  static int
! loopzip_traverse(loopzipobject *lz, visitproc visit, void *arg)
  {
  	if (lz->ittuple)
--- 1138,1154 ----
  	lz->tuplesize = tuplesize;
  
  	return (PyObject *)lz;
  }
  
  static void
! izip_dealloc(izipobject *lz)
  {
  	PyObject_GC_UnTrack(lz);
  	Py_XDECREF(lz->ittuple);
  	lz->ob_type->tp_free(lz);
  }
  
  static int
! izip_traverse(izipobject *lz, visitproc visit, void *arg)
  {
  	if (lz->ittuple)
***************
*** 1172,1192 ****
  
  static PyObject *
! loopzip_next(loopzipobject *lz)
  {
  	int i;
  	long tuplesize = lz->tuplesize;
! 	PyObject *result = lz->result;
  	PyObject *it;
  	PyObject *item;
  
! 	/* XXX: Add check that resultsize == tuplesize */
  	for (i=0 ; i < tuplesize ; i++) {
- 		item = PyList_GET_ITEM(result, i);
- 		Py_DECREF(item);
  		it = PyTuple_GET_ITEM(lz->ittuple, i);
  		item = PyIter_Next(it);
  		if (item == NULL)
  			return NULL;
! 		PyList_SET_ITEM(result, i, item);
  	}
  	Py_INCREF(result);
--- 1158,1179 ----
  
  static PyObject *
! izip_next(izipobject *lz)
  {
  	int i;
  	long tuplesize = lz->tuplesize;
! 	PyObject *result;
  	PyObject *it;
  	PyObject *item;
  
! 	result = PyTuple_New(tuplesize);
! 	if (result == NULL)
! 		return NULL;
! 
  	for (i=0 ; i < tuplesize ; i++) {
  		it = PyTuple_GET_ITEM(lz->ittuple, i);
  		item = PyIter_Next(it);
  		if (item == NULL)
  			return NULL;
! 		PyTuple_SET_ITEM(result, i, item);
  	}
  	Py_INCREF(result);
***************
*** 1195,1199 ****
  
  static PyObject *
! loopzip_getiter(PyObject *lz)
  {
  	Py_INCREF(lz);
--- 1182,1186 ----
  
  static PyObject *
! izip_getiter(PyObject *lz)
  {
  	Py_INCREF(lz);
***************
*** 1201,1225 ****
  }
  
! PyDoc_STRVAR(loopzip_doc,
! "loopzip(iter1 [,iter2 [...]]) --> loopzip object\n\
  \n\
! Return a loopzip object whose .next() method returns a list where\n\
  the i-th element comes from the i-th iterable argument.  The .next()\n\
! method updates the returns the same list everytime until the shortest\n\
! iterable in the argument sequence is exhausted and then it raises\n\
! StopIteration.  Works like the zip() function but consumes less memory.\n\
! Unlike zip, it returns an iterator and the n-th return is a list rather\n\
! than a tuple.  It is appropriate for use in loops, but not for conversion\n\
! to a list.  For example:  list(loopzip('abc')) returns a list of three\n\
! identical sublists which is usually not what was intended.");
  
! PyTypeObject loopzip_type = {
  	PyObject_HEAD_INIT(NULL)
  	0,				/* ob_size */
! 	"itertools.loopzip",		 /* tp_name */
! 	sizeof(loopzipobject),		 /* tp_basicsize */
  	0,				/* tp_itemsize */
  	/* methods */
! 	(destructor)loopzip_dealloc,	   /* tp_dealloc */
  	0,				/* tp_print */
  	0,				/* tp_getattr */
--- 1188,1209 ----
  }
  
! PyDoc_STRVAR(izip_doc,
! "izip(iter1 [,iter2 [...]]) --> izip object\n\
  \n\
! Return a izip object whose .next() method returns a tuple where\n\
  the i-th element comes from the i-th iterable argument.  The .next()\n\
! method continues until the shortest iterable in the argument sequence\n\
! is exhausted and then it raises StopIteration.  Works like the zip()\n
! function but consumes less memory by returning an iterator instead of\n\
! a list.");
  
! PyTypeObject izip_type = {
  	PyObject_HEAD_INIT(NULL)
  	0,				/* ob_size */
! 	"itertools.izip",		 /* tp_name */
! 	sizeof(izipobject),		 /* tp_basicsize */
  	0,				/* tp_itemsize */
  	/* methods */
! 	(destructor)izip_dealloc,	   /* tp_dealloc */
  	0,				/* tp_print */
  	0,				/* tp_getattr */
***************
*** 1238,1248 ****
  	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
  		Py_TPFLAGS_BASETYPE,	/* tp_flags */
! 	loopzip_doc,			   /* tp_doc */
! 	(traverseproc)loopzip_traverse,    /* tp_traverse */
  	0,				/* tp_clear */
  	0,				/* tp_richcompare */
  	0,				/* tp_weaklistoffset */
! 	(getiterfunc)loopzip_getiter,	   /* tp_iter */
! 	(iternextfunc)loopzip_next,	   /* tp_iternext */
  	0,				/* tp_methods */
  	0,				/* tp_members */
--- 1222,1232 ----
  	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
  		Py_TPFLAGS_BASETYPE,	/* tp_flags */
! 	izip_doc,			   /* tp_doc */
! 	(traverseproc)izip_traverse,    /* tp_traverse */
  	0,				/* tp_clear */
  	0,				/* tp_richcompare */
  	0,				/* tp_weaklistoffset */
! 	(getiterfunc)izip_getiter,	   /* tp_iter */
! 	(iternextfunc)izip_next,	   /* tp_iternext */
  	0,				/* tp_methods */
  	0,				/* tp_members */
***************
*** 1255,1259 ****
  	0,				/* tp_init */
  	PyType_GenericAlloc,		/* tp_alloc */
! 	loopzip_new,			 /* tp_new */
  	PyObject_GC_Del,		/* tp_free */
  };
--- 1239,1243 ----
  	0,				/* tp_init */
  	PyType_GenericAlloc,		/* tp_alloc */
! 	izip_new,			 /* tp_new */
  	PyObject_GC_Del,		/* tp_free */
  };
***************
*** 1375,1380 ****
  \n\
  Iterators terminating on the shortest input sequence:\n\
! loopzip(p, q, ...) --> [p[0], q[0]], [p[1], q[1]], ... \n\
!        same list each time but with updated contents\n\
  ifilter(pred, seq, invert=False) --> elements of seq where\n\
         pred(elem) is True (or False if invert is set)\n\
--- 1359,1363 ----
  \n\
  Iterators terminating on the shortest input sequence:\n\
! izip(p, q, ...) --> (p[0], q[0]), (p[1], q[1]), ... \n\
  ifilter(pred, seq, invert=False) --> elements of seq where\n\
         pred(elem) is True (or False if invert is set)\n\
***************
*** 1435,1442 ****
  	PyModule_AddObject(m, "count", (PyObject *)&count_type);
  
! 	if (PyType_Ready(&loopzip_type) < 0)
  		return;
! 	Py_INCREF(&loopzip_type);
! 	PyModule_AddObject(m, "loopzip", (PyObject *)&loopzip_type);
  
  	if (PyType_Ready(&repeat_type) < 0)
--- 1418,1425 ----
  	PyModule_AddObject(m, "count", (PyObject *)&count_type);
  
! 	if (PyType_Ready(&izip_type) < 0)
  		return;
! 	Py_INCREF(&izip_type);
! 	PyModule_AddObject(m, "izip", (PyObject *)&izip_type);
  
  	if (PyType_Ready(&repeat_type) < 0)

Index: libitertools.tex
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/itertools/libitertools.tex,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** libitertools.tex	27 Jan 2003 15:42:54 -0000	1.9
--- libitertools.tex	27 Jan 2003 23:33:26 -0000	1.10
***************
*** 38,62 ****
          penalty.
  
-     \item Wherever straight-forward alternatives exist, the corresponding
-         tools in this module seek to meet a different need and are designed
-         for speed.  In fact, the \emph{sole} justification for this module
-         being written in C is its speed advantage.
- 
-         For instance, the
-         \module{__builtins__} module has an easy-to-use, no surprises version
-         of \function(zip()).  This module's corresponding function,
-         \function{loopzip()} returns an iterator rather than a full list.
-         Also, calls to the iterator return a mutable list rather than a tuple
-         and it returns the \emph{same} list on each pass.  Used in a
-         \keyword{for} loop, \function{loopzip()} can be directly substituted
-         for \function{zip()} and run much faster.  It has nearly zero
-         overhead since the looping is done in C code (bypassing Python's eval
-         loop); since it returns an iterator (saving the need to allocate a
-         list and append to it an element at a time); and since it reuses just
-         one output list (saving the time to allocate and build a tuple on
-         every pass).  Though very fast, using \function{loopzip()} outside of
-         a \keyword{for} loop or other itertool can result in surprising
-         behavior and an unwelcome refresher lesson in mutability.
- 
      \item Another source of value comes from standardizing a core set of tools
          to avoid the readability and reliability problems that arise when many
--- 38,41 ----
***************
*** 178,199 ****
  \end{funcdesc}
  
! \begin{funcdesc}{loopzip}{*iterables}
    Make an iterator that aggregates elements from each of the iterables.
    Like \function{zip()} except that it returns an iterator instead of
!   a list and the individual elements are stored in a list rather than
!   in a tuple.  The \emph{same} list is used for each pass and only the
!   contents are updated; hence, \function{loopzip()} is only appropriate
!   in a \keyword{for} loop or other itertool.  The iterator terminates
!   with \exception{StopIteration} when the first of the iterables is
!   exhausted.  Equivalent to:
  
    \begin{verbatim}
!      def loopzip(*iterables):
!          iterables = map(iter, iterables)
!          result = [None] * len(iterables)
!          while True:
!              for i in xrange(len(iterables)):
!                  result[i] = iterables[i].next()
!              yield result
    \end{verbatim}
  \end{funcdesc}
--- 157,171 ----
  \end{funcdesc}
  
! \begin{funcdesc}{izip}{*iterables}
    Make an iterator that aggregates elements from each of the iterables.
    Like \function{zip()} except that it returns an iterator instead of
!   a list.  Equivalent to:
  
    \begin{verbatim}
!      def izip(*iterables):
! 	 iterables = map(iter, iterables)
! 	 while True:
! 	     result = [i.next() for i in iterables]
! 	     yield tuple(result)
    \end{verbatim}
  \end{funcdesc}
***************
*** 266,270 ****
  Hello
  
! >>> for checknum, amount in loopzip(count(1200), amounts):
  ...     print 'Check %d is for $%.2f' % (checknum, amount)
  ...
--- 238,242 ----
  Hello
  
! >>> for checknum, amount in izip(count(1200), amounts):
  ...     print 'Check %d is for $%.2f' % (checknum, amount)
  ...
***************
*** 273,276 ****
--- 245,249 ----
  Check 1202 is for $823.14
  
+ >>> import operator
  >>> bases = [2, 3, 5, 7]
  >>> powers = [2, 3, 4]
***************
*** 278,281 ****
--- 251,258 ----
  ...     print list(imap(operator.pow, bases, repeat(power)))
  ...
+ [4, 9, 25, 49]
+ [8, 27, 125, 343]
+ [16, 81, 625, 2401]
+ 
  \end{verbatim}
  

Index: test_itertools.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/itertools/test_itertools.py,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** test_itertools.py	27 Jan 2003 12:16:42 -0000	1.6
--- test_itertools.py	27 Jan 2003 23:33:26 -0000	1.7
***************
*** 19,27 ****
          self.assertRaises(TypeError, ifilter, isEven, [3], True, 4)
  
!     def test_loopzip(self):
!         ans = [(x,y) for x, y in loopzip('abc',count())]
          self.assertEqual(ans, [('a', 0), ('b', 1), ('c', 2)])
!         self.assertEqual(list(loopzip('abc',count())), [['c', 2]] * 3)
!         self.assertRaises(TypeError, loopzip)
  
      def test_repeat(self):
--- 19,26 ----
          self.assertRaises(TypeError, ifilter, isEven, [3], True, 4)
  
!     def test_izip(self):
!         ans = [(x,y) for x, y in izip('abc',count())]
          self.assertEqual(ans, [('a', 0), ('b', 1), ('c', 2)])
!         self.assertRaises(TypeError, izip)
  
      def test_repeat(self):
***************
*** 80,84 ****
          self.assertEqual(list(dropwhile(underten, data)), [20, 2, 4, 6, 8])
  
! def test_main():
      suite = unittest.TestSuite()
      for testclass in (TestBasicOps,
--- 79,126 ----
          self.assertEqual(list(dropwhile(underten, data)), [20, 2, 4, 6, 8])
  
! libreftest = """ Doctest for examples in the library reference, libitertools.tex
! 
! >>> for i in times(3):
! ...     print "Hello"
! ...
! Hello
! Hello
! Hello
! 
! >>> amounts = [120.15, 764.05, 823.14]
! >>> for checknum, amount in izip(count(1200), amounts):
! ...     print 'Check %d is for $%.2f' % (checknum, amount)
! ...
! Check 1200 is for $120.15
! Check 1201 is for $764.05
! Check 1202 is for $823.14
! 
! >>> import operator
! >>> bases = [2, 3, 5, 7]
! >>> powers = [2, 3, 4]
! >>> for power in powers:
! ...     print list(imap(operator.pow, bases, repeat(power)))
! ...
! [4, 9, 25, 49]
! [8, 27, 125, 343]
! [16, 81, 625, 2401]
! 
! 
! >>> def enumerate(s):
! ...     return izip(count(), s)
! >>> def tabulate(f):
! ...     return imap(f, count())
! >>> def iteritems(d):
! ...     return izip(d.iterkeys(), d.itervalues())
! >>> def nth(s, n):
! ...     return islice(n, n+1).next()
! 
! 
! """
! 
! __test__ = {'libreftest' : libreftest}
! 
! def test_main(verbose=None):
!     import test_itertools
      suite = unittest.TestSuite()
      for testclass in (TestBasicOps,
***************
*** 86,90 ****
          suite.addTest(unittest.makeSuite(testclass))
      test_support.run_suite(suite)
  
  if __name__ == "__main__":
!     test_main()
--- 128,133 ----
          suite.addTest(unittest.makeSuite(testclass))
      test_support.run_suite(suite)
+     test_support.run_doctest(test_itertools, verbose)
  
  if __name__ == "__main__":
!     test_main(verbose=True)

Index: todo.txt
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/itertools/todo.txt,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** todo.txt	27 Jan 2003 12:16:42 -0000	1.9
--- todo.txt	27 Jan 2003 23:33:26 -0000	1.10
***************
*** 1,2 ****
--- 1,10 ----
+ Comments from Skip and Jack:
+     func=None in map
+     provide in-line motivating examples
+     ? add default arg to times()
+ 
+ Doctest:
+     make islice() python code perform the same as the C code
+ 
  Add:
     iapply(func)  			?? what did this do in SML
***************
*** 12,16 ****
  Things dropped because they bug me:
     cycle(seqn) requires auxilliary storage (which is surprising
!          behavior for iterators).  This is best left for pure python.  
  
  Things that just bug me:
--- 20,26 ----
  Things dropped because they bug me:
     cycle(seqn) requires auxilliary storage (which is surprising
!          behavior for iterators).  This is best left for pure python.
!    loopzip(s1, s2, ...) returned mutuable lists which could be
!          prone to creating hard-to-find errors.
  
  Things that just bug me: