[Python-checkins] bpo-30346: An iterator produced by the itertools.groupby() iterator (#1569)

Serhiy Storchaka webhook-mailer at python.org
Sun Sep 24 06:36:14 EDT 2017


https://github.com/python/cpython/commit/c247caf33f6e6000d828db4762d1cb12edf3cd57
commit: c247caf33f6e6000d828db4762d1cb12edf3cd57
branch: master
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2017-09-24T13:36:11+03:00
summary:

bpo-30346: An iterator produced by the itertools.groupby() iterator (#1569)

now becames exhausted after advancing the groupby iterator.

files:
A Misc/NEWS.d/next/Library/2017-09-24-13-08-46.bpo-30346.Csse77.rst
M Doc/library/itertools.rst
M Lib/test/test_itertools.py
M Modules/itertoolsmodule.c

diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst
index c989e464200..530c29dec4a 100644
--- a/Doc/library/itertools.rst
+++ b/Doc/library/itertools.rst
@@ -401,13 +401,14 @@ loops that truncate the stream.
           def __iter__(self):
               return self
           def __next__(self):
+              self.id = object()
               while self.currkey == self.tgtkey:
                   self.currvalue = next(self.it)    # Exit on StopIteration
                   self.currkey = self.keyfunc(self.currvalue)
               self.tgtkey = self.currkey
-              return (self.currkey, self._grouper(self.tgtkey))
-          def _grouper(self, tgtkey):
-              while self.currkey == tgtkey:
+              return (self.currkey, self._grouper(self.tgtkey, self.id))
+          def _grouper(self, tgtkey, id):
+              while self.id is id and self.currkey == tgtkey:
                   yield self.currvalue
                   try:
                       self.currvalue = next(self.it)
diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
index 50cf1488ec6..8353e68977d 100644
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@@ -751,6 +751,26 @@ def test_groupby(self):
         self.assertEqual(set(keys), expectedkeys)
         self.assertEqual(len(keys), len(expectedkeys))
 
+        # Check case where inner iterator is used after advancing the groupby
+        # iterator
+        s = list(zip('AABBBAAAA', range(9)))
+        it = groupby(s, testR)
+        _, g1 = next(it)
+        _, g2 = next(it)
+        _, g3 = next(it)
+        self.assertEqual(list(g1), [])
+        self.assertEqual(list(g2), [])
+        self.assertEqual(next(g3), ('A', 5))
+        list(it)  # exhaust the groupby iterator
+        self.assertEqual(list(g3), [])
+
+        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
+            it = groupby(s, testR)
+            _, g = next(it)
+            next(it)
+            next(it)
+            self.assertEqual(list(pickle.loads(pickle.dumps(g, proto))), [])
+
         # Exercise pipes and filters style
         s = 'abracadabra'
         # sort s | uniq
diff --git a/Misc/NEWS.d/next/Library/2017-09-24-13-08-46.bpo-30346.Csse77.rst b/Misc/NEWS.d/next/Library/2017-09-24-13-08-46.bpo-30346.Csse77.rst
new file mode 100644
index 00000000000..81ad0534fc1
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-09-24-13-08-46.bpo-30346.Csse77.rst
@@ -0,0 +1,2 @@
+An iterator produced by itertools.groupby() iterator now becames exhausted
+after advancing the groupby iterator.
diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c
index 48e6c35db4f..2ac5ab24ec8 100644
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@@ -17,6 +17,7 @@ typedef struct {
     PyObject *tgtkey;
     PyObject *currkey;
     PyObject *currvalue;
+    const void *currgrouper;  /* borrowed reference */
 } groupbyobject;
 
 static PyTypeObject groupby_type;
@@ -77,6 +78,7 @@ groupby_next(groupbyobject *gbo)
 {
     PyObject *newvalue, *newkey, *r, *grouper;
 
+    gbo->currgrouper = NULL;
     /* skip to next iteration group */
     for (;;) {
         if (gbo->currkey == NULL)
@@ -255,6 +257,7 @@ _grouper_create(groupbyobject *parent, PyObject *tgtkey)
     Py_INCREF(parent);
     igo->tgtkey = tgtkey;
     Py_INCREF(tgtkey);
+    parent->currgrouper = igo;  /* borrowed reference */
 
     PyObject_GC_Track(igo);
     return (PyObject *)igo;
@@ -284,6 +287,8 @@ _grouper_next(_grouperobject *igo)
     PyObject *newvalue, *newkey, *r;
     int rcmp;
 
+    if (gbo->currgrouper != igo)
+        return NULL;
     if (gbo->currvalue == NULL) {
         newvalue = PyIter_Next(gbo->it);
         if (newvalue == NULL)
@@ -321,6 +326,9 @@ _grouper_next(_grouperobject *igo)
 static PyObject *
 _grouper_reduce(_grouperobject *lz)
 {
+    if (((groupbyobject *)lz->parent)->currgrouper != lz) {
+        return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
+    }
     return Py_BuildValue("O(OO)", Py_TYPE(lz), lz->parent, lz->tgtkey);
 }
 



More information about the Python-checkins mailing list