Capturing repeating group matches in regular expressions

Wed Aug 11 14:55:34 EDT 2004

It's a bit wordy, but perhaps the ability to easily structure and retrieve
your returned tokens may sway you.

Download pyparsing at http://pyparsing.sourceforge.net

-- Paul

from pyparsing import Word,OneOrMore

# define parse grammar
lowers = "abcdefghijklmnopqrstuvwxyz"
endsWithW = Word(lowers,"W",exact=2)
endsWithX = Word(lowers,"X",exact=2)
endsWithY = Word(lowers,"Y",exact=2)

patt = endsWithW.setResultsName("W") + \
       OneOrMore( endsWithX ).setResultsName("X") + \
       endsWithY.setResultsName("Y")

# extract tokens from input string
tokens = patt.parseString("aWbXcXdXeXfY")

# tokens can be accessed as a list
print "tokens:",tokens

# tokens can be coerced to be a true list
print "tokens.asList():",tokens.asList()

# tokens can be a dictionary, if results names specified
print "tokens.keys():",tokens.keys()
print "tokens['W']:",tokens['W']
print "tokens['X']:",tokens['X']
print "tokens['Y']:",tokens['Y']

# if results names are valid attribute names, can even look like attribute
print "tokens.W:",tokens.W
print "tokens.X:",tokens.X
print "tokens.Y:",tokens.Y

Gives:

tokens: ['aW', 'bX', 'cX', 'dX', 'eX', 'fY']
tokens.asList(): ['aW', 'bX', 'cX', 'dX', 'eX', 'fY']
tokens.keys(): ['Y', 'X', 'W']
tokens['W']: aW
tokens['X']: ['bX', 'cX', 'dX', 'eX']
tokens['Y']: fY
tokens.W: aW
tokens.X: ['bX', 'cX', 'dX', 'eX']
tokens.Y: fY