Parse each line by character location

bearophileHUGS at lycos.com bearophileHUGS at lycos.com
Tue Nov 4 15:35:41 EST 2008


George Sakkis:
> Here's a general solution for fixed size records:
> >>> def slicer(*sizes):
>
> ...     slices = len(sizes) * [None]
> ...     start = 0
> ...     for i,size in enumerate(sizes):
> ...         stop = start+size
> ...         slices[i] = slice(start,stop)
> ...         start = stop
> ...     return lambda string: [string[s].strip() for s in slices]
> ...>>> order_slicer = slicer(10,1,10,4)
> >>> order_slicer('______2345H0000300000_NC_'.replace('_',' '))
> ['2345', 'H', '0000300000', 'NC']


Nice. Here's a little modified version:

from collections import namedtuple

def slicer(names, sizes):
    """
    >>> sl = slicer(["code", "p1", "progressive", "label"], (10, 1,
10, 4))
    >>> txt = "______2345H0000300000_NC_"
    >>> print sl(txt.replace('_', ' '))
    Sliced(code='2345', p1='H', progressive='0000300000', label='NC')
    """
    # several input controls can be added here
    slices = []
    start = 0
    for size in sizes:
        stop = start + size
        slices.append(slice(start, stop))
        start = stop
    Sliced = namedtuple("Sliced", names)
    return lambda txt: Sliced(*(txt[s].strip() for s in slices))

if __name__ == "__main__":
    import doctest
    doctest.testmod()
    print "Doctests done.\n"

Bye,
bearophile



More information about the Python-list mailing list