Splitting a string into substrings of equal size

Jan Kaliszewski zuo at chopin.edu.pl
Fri Aug 14 22:17:35 EDT 2009


15-08-2009 candide <candide at free.invalid> wrote:

> Suppose you need to split a string into substrings of a given size  
> (except
> possibly the last substring). I make the hypothesis the first slice is  
> at the end of the string.
> A typical example is provided by formatting a decimal string with  
> thousands separator.

I'd use iterators, especially for longer strings...


import itertools

def separate(text, grouplen=3, sep=','):
     "separate('12345678') -> '123,456,78'"
     repeated_iterator = [iter(text)] * grouplen
     groups = itertools.izip_longest(fillvalue='', *repeated_iterator)
     strings = (''.join(group) for group in groups)  # gen. expr.
     return sep.join(strings)

def back_separate(text, grouplen=3, sep=','):
     "back_separate('12345678') -> '12,345,678'"
     repeated_iterator = [reversed(text)] * grouplen
     groups = itertools.izip_longest(fillvalue='', *repeated_iterator)
     strings = [''.join(reversed(group)) for group in groups]  # list compr.
     return sep.join(reversed(strings))

print separate('12345678')
print back_separate('12345678')

# alternate implementation
# (without "materializing" 'strings' as a list in back_separate):
def separate(text, grouplen=3, sep=','):
     "separate('12345678') -> '12,345,678'"
     textlen = len(text)
     end = textlen - (textlen % grouplen)
     repeated_iterator = [iter(itertools.islice(text, 0, end))] * grouplen
     strings = itertools.imap(lambda *chars: ''.join(chars),  
*repeated_iterator)
     return sep.join(itertools.chain(strings, (text[end:],)))

def back_separate(text, grouplen=3, sep=','):
     "back_separate('12345678') -> '12,345,678'"
     beg = len(text) % grouplen
     repeated_iterator = [iter(itertools.islice(text, beg, None))] *  
grouplen
     strings = itertools.imap(lambda *chars: ''.join(chars),  
*repeated_iterator)
     return sep.join(itertools.chain((text[:beg],), strings))

print separate('12345678')
print back_separate('12345678')


http://docs.python.org/library/itertools.html#recipes
was the inspiration for me (especially grouper).

Cheers,
*j
-- 
Jan Kaliszewski (zuo) <zuo at chopin.edu.pl>



More information about the Python-list mailing list