itertools.izip brokeness

Bengt Richter bokr at oz.net
Thu Jan 5 20:25:48 EST 2006


On Thu, 05 Jan 2006 07:42:25 GMT, bokr at oz.net (Bengt Richter) wrote:

>On 4 Jan 2006 15:20:43 -0800, "Raymond Hettinger" <python at rcn.com> wrote:
>
[ ... 5 options enumerated ... ]
>>
>>
>6. Could modify izip so that one could write
>
>    from itertools import izip
>    zipit = izip(*seqs)	# bind iterator object to preserve access to its state later
>    for tup in zipit:
>        # do something with tup as now produced
>    for tup in zipit.rest(sentinel):
>        # tup starts with the tuple that would have been returned if all sequences
>        # had been sampled and sentinel substituted where StopIteration happened.
>        # continuing until but not including (sentinel,)*len(seqs)
>
>This would seem backwards compatible, and also potentially allow you to use the rest mode
>from the start, as in
>
>    for tup in izip(*seqs).rest(sentinel):
>        # process tup and notice sentinel for yourself
>
Demo-of-concept hack: only tested as you see below

----< izip2.py >-----------------------------------------------------
class izip2(object):
    """
    works like itertools.izip except that
    if a reference (e.g. it) to the stopped iterator is preserved,
    it.rest(sentinel) returns an iterator that will continue
    to return tuples with sentinel substituted for items from
    exhausted sequences, until all sequences are exhausted.
    """
    FIRST, FIRST_STOP, FIRST_REST, REST, REST_STOP = xrange(5)
    def __init__(self, *seqs):
        self.iters = map(iter, seqs)
        self.restmode = self.FIRST
    def __iter__(self): return self
    def next(self):
        if not self.iters: raise StopIteration
        if self.restmode == self.FIRST:
            tup=[]
            try:
                for i, it in enumerate(self.iters):
                    tup.append(it.next())
                return tuple(tup)
            except StopIteration:
                self.restmode = self.FIRST_STOP # stopped, not rest-restarted
                self.tup=tup;self.i=i
                raise
        elif self.restmode==self.FIRST_STOP:  # normal part exhausted
            raise StopIteration
        elif self.restmode in (self.FIRST_REST, self.REST):
            if self.restmode == self.FIRST_REST:
                tup = self.tup # saved
                self.restmode = self.REST
            else:
                tup=[]
                for it in self.iters:
                    try: tup.append(it.next())
                    except StopIteration: tup.append(self.sentinel)
            tup = tuple(tup)
            if tup==(self.sentinel,)*len(self.iters):
                self.restmode = self.REST_STOP
                raise StopIteration
            return tuple(tup)
        elif self.restmode==self.REST_STOP:  # rest part exhausted
            raise StopIteration
        else:
            raise RuntimeError('Bad restmode: %r'%self.restmode)
    def rest(self, sentinel=''):
        self.sentinel = sentinel
        if self.restmode==self.FIRST: # prior to any sequence end
            self.restmode = self.REST
            return self
        self.tup.append(sentinel)
        for it in self.iters[self.i+1:]:
            try: self.tup.append(it.next())
            except StopIteration: self.tup.append(sentinel)
        self.restmode = self.FIRST_REST
        return self

def test():
    assert list(izip2())==[]
    assert list(izip2().rest(''))==[]
    it = izip2('ab', '1')
    assert list(it)==[('a', '1')]
    assert list(it.rest())==[('b', '')]
    it = izip2('a', '12')
    assert list(it)==[('a', '1')]
    assert list(it.rest())==[('', '2')]
    it = izip2('ab', '12')
    assert list(it)==[('a', '1'), ('b', '2')]
    assert list(it.rest())==[]
    it = izip2(xrange(3), (11,22), 'abcd')
    assert list(it) == [(0, 11, 'a'), (1, 22, 'b')]
    assert list(it.rest(None)) == [(2, None, 'c'), (None, None, 'd')]
    print 'test passed'

if __name__ == '__main__': test()
---------------------------------------------------------------------

Using this, Antoon's example becomes:

 >>> from izip2 import izip2
 >>> it = izip2([3,5,8], [11,22])
 >>> for t in it: print t
 ...
 (3, 11)
 (5, 22)
 >>> for t in it.rest('Bye'): print t
 ...
 (8, 'Bye')

Want to make an efficient C version, Raymond? ;-)

Regards,
Bengt Richter



More information about the Python-list mailing list