Refactor a buffered class...

George Sakkis george.sakkis at gmail.com
Wed Sep 6 23:20:43 EDT 2006


Michael Spencer wrote:

> Here's a small update to the generator that allows optional handling of the head
> and the tail:
>
> def chunker(s, chunk_size=3, sentry=".", keep_first = False, keep_last = False):
>      buffer=[]
>      sentry_count = 0
>
>      for item in s:
>          buffer.append(item)
>          if item == sentry:
>              sentry_count += 1
>              if sentry_count < chunk_size:
>                  if keep_first:
>                      yield buffer
>              else:
>                  yield buffer
>                  del buffer[:buffer.index(sentry)+1]
>
>      if keep_last:
>          while buffer:
>              yield buffer
>              del buffer[:buffer.index(sentry)+1]


And here's a (probably) more efficient version, using a deque as a
buffer:

from itertools import islice
from collections import deque

def chunker(seq, sentry='.', chunk_size=3, keep_first=False,
keep_last=False):
    def format_chunks(chunks):
        return [' '.join(chunk) for chunk in chunks]
    iterchunks = itersplit(seq,sentry)
    buf = deque()
    for chunk in islice(iterchunks, chunk_size-1):
        buf.append(chunk)
        if keep_first:
            yield format_chunks(buf)
    for chunk in iterchunks:
        buf.append(chunk)
        yield format_chunks(buf)
        buf.popleft()
    if keep_last:
        while buf:
            yield format_chunks(buf)
            buf.popleft()


def itersplit(seq, sentry='.'):
    # split the iterable `seq` on each `sentry`
    buf = []
    for x in seq:
        if x != sentry:
            buf.append(x)
        else:
            yield buf
            buf = []
    if buf:
        yield buf


s = " this .  is a . test to . check if it . works . well . it looks .
like ."
for p in chunker(s.split(), keep_last=True, keep_first=True):
    print p


George




More information about the Python-list mailing list