Inserting while itterating

Thu Jan 15 19:22:14 EST 2004

Skip Montanaro wrote:

>>>>>> "Mark" == Mark McEahern <mark at mceahern.com> writes:
> 
>     Mark> On Wed, 2004-01-14 at 02:43, Thomas Guettler wrote:
>     >> Hi,
>     >> 
>     >> Simple excerise:
>     >> 
>     >> You have a sorted list of integers:
>     ...
>     >> and you should fill all gaps:
>     ...
>     >> How would you code this?
>     >> 
>     >> Constrain: The original list should be changed, don't create a
>     >> copy.
> 
>     Mark> In the spirt of unit testing...
> 
>     Mark> #!/usr/bin/env python
>     ...
> 
> Here's a version which is much faster if you have large gaps and appears
> to be only marginally slower if you have small gaps.
> 
> Skip
> 
> #!/usr/bin/env python
> 
> #!/usr/bin/env python
> 
> def fillGaps1(seq):
      if  len(seq) == seq[-1]:
          raise Exception("nothing to do")
>     expectedLength = seq[-1] - seq[0] + 1
>     i = 1
>     while i < expectedLength:
>         if seq[i] - seq[i-1] > 1:
>             seq.insert(i, seq[i-1] + 1)
>         i += 1
> 
> def fillGaps(seq):
>     expectedLength = seq[-1] - seq[0] + 1
>     i = 1
>     while i < expectedLength:
>         if seq[i] - seq[i-1] > 1:
>             gap = seq[i-1] - seq[i]
>             fill = range(seq[i-1]+1, seq[i])
>             seq[i:i] = fill
>             i += len(fill)
>         i += 1
> 
> if __name__ == "__main__":
>     import timeit
> 
>     print "timing with one large gap:"
>     t = timeit.Timer(setup='from fillgaps import fillGaps1 as fillGaps',
>                      stmt='fillGaps([1, 5000])')
>     print "old fillgaps:", t.timeit(number=100)
>     t = timeit.Timer(setup='from fillgaps import fillGaps',
>                      stmt='fillGaps([1, 5000])')
>     print "new fillgaps:", t.timeit(number=100)
> 
>     print "timing with many small gaps:"
>     t = timeit.Timer(setup='from fillgaps import fillGaps1 as
>     fillGaps;l=range(1,5001,2)',
>                      stmt='fillGaps(l)')

I think the list should be initialized in the statement instead of the
setup. Otherwise subsequent passes will measure for range(1, 5000).

>     print "old fillgaps:", t.timeit(number=100)
>     t = timeit.Timer(setup='from fillgaps import
>     fillGaps;l=range(1,5001,2)',
>                      stmt='fillGaps(l)')
>     print "new fillgaps:", t.timeit(number=100)
> 
>     import unittest
>     class test(unittest.TestCase):
>         def test(self):
>             for fg in (fillGaps1, fillGaps):
>                 l = [1, 2, 4, 7, 8, 12]
>                 fg(l)
>                 self.assertEquals(l, range(1, 13))
> 
>                 l = [1, 5000]
>                 fg(l)
>                 self.assertEquals(l, range(1, 5001))
> 
>     unittest.main()

Here's another fillgap version:

def fillGapsPeter(seq):
    # inspired by anton muhin
    lo = seq[0]
    hi = seq[-1]
    seq[1:1] = range(lo+1, hi+1)
    while len(seq) > hi:
        i = seq.pop()
        seq[i-lo] = i

def fillGapsMark(seq):
    expectedLength = seq[-1] - seq[0] + 1
    i = 1
    while i < expectedLength:
        if seq[i] - seq[i-1] > 1:
            seq.insert(i, seq[i-1] + 1)
        i += 1

def fillGapsSkip(seq):
    expectedLength = seq[-1] - seq[0] + 1
    i = 1
    while i < expectedLength:
        if seq[i] - seq[i-1] > 1:
            gap = seq[i-1] - seq[i]
            fill = range(seq[i-1]+1, seq[i])
            seq[i:i] = fill
            i += len(fill)
        i += 1

if __name__ == "__main__":
    import timeit, fillgaps, sys
    fnames = [fname for fname in dir(fillgaps) if callable(getattr(fillgaps,
fname))]
    fnames.sort()
    for header, lst in [
        ("original test case", [1, 2, 4, 7, 8, 12]),
        ("timing with one large gap:", [1, 5000]),
        ("timing with many small gaps:", range(1, 5001, 2))]:
        print header
        for fn in fnames:
            t = timeit.Timer(setup='from fillgaps import %s as fillGaps' %
fn,
                        stmt='fillGaps(%s)' % (lst,))
            print "\t%s:" % fn, t.timeit(number=100)
        print
    import unittest
    class test(unittest.TestCase):
        def test_all(self):
            for fg in [getattr(fillgaps, fn) for fn in fnames]:
                l = [1, 2, 4, 7, 8, 12]
                fg(l)
                self.assertEquals(l, range(1, 13))

                l = [1, 5000]
                fg(l)
                self.assertEquals(l, range(1, 5001))
        def test_mine(self):
            """ Hard to prove I'm not cheating,
                hope I got it right...
            """
            class myint(int):
                def __repr__(self):
                    return "my" + int.__repr__(self)

            original = map(myint, [1, 2, 4, 7, 8, 12])
            l = list(original)
            fillGapsPeter(l)
            self.assertEquals(l, range(1, 13))
            for i in original:
                self.assert_(i is l[i-1])
                self.assert_(repr(i).startswith("my"))
    unittest.main()

My findings:

original test case
        fillGapsMark: 0.00151419639587
        fillGapsPeter: 0.00127387046814
        fillGapsSkip: 0.00162696838379

timing with one large gap:
        fillGapsMark: 0.872708797455
        fillGapsPeter: 0.0312719345093
        fillGapsSkip: 0.0336079597473

timing with many small gaps:
        fillGapsMark: 0.973310947418
        fillGapsPeter: 0.412738800049
        fillGapsSkip: 1.47315406799

Peter