need simple parsing ability

Jeff Epler jepler at unpythonic.net
Fri Jul 16 11:45:52 EDT 2004


COMMA = ","
OPT_WS = "[ \t]*"
STEM = "([a-zA-Z_]*)"
NUMBER = "([0-9]+)"
OPT_NUMBER = NUMBER + "?"
OPT_SECOND_NUMBER = "(?:-" + NUMBER + ")?" 

import re
splitter = re.compile(COMMA + OPT_WS).split
print `STEM + OPT_NUMBER + OPT_SECOND_NUMBER`
parser = re.compile(STEM + OPT_NUMBER + OPT_SECOND_NUMBER).match

def expand(stem, n0, n1):
    if not n1:
        if n0:
            yield "%s%s" % (stem, n0)
        else:
            yield stem
        return
    l = len(n0)
    n0 = int(n0, 10)
    n1 = int(n1, 10)

    for i in range(n0, n1+1):
        yield "%s%0*d" % (stem, l, i)

def parse_string(line):
    items = splitter(line)
    parsed_items = [parser(i) for i in items]
    for i, pi in zip(items, parsed_items):
        if i is None:
            raise ValueError, "Invalid item: %r" % i
        stem = pi.group(1)
        n0 = pi.group(2)
        n1 = pi.group(3)
        if n1 and not n0:
            raise ValueError, "Invalid item: %r" % i
        for j in expand(stem, n0, n1):
            yield j
 
def test():
    s = "9,foo7-9,bar_09-12,2-4,spam"
    print s, list(parse_string(s))
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
URL: <http://mail.python.org/pipermail/python-list/attachments/20040716/f25364a5/attachment.sig>


More information about the Python-list mailing list