how to parse structured text file?

Paul McGuire ptmcg at austin.rr.com
Tue Jan 31 20:09:10 EST 2006


A problem fit for pyparsing!  Download pyparsing at
http://pyparsing.sourceforge.net.

Assuming you always have these fields, in this order, this program will
figure them out.  If not, you'll need to tweak the pyparsing
definitions as needed.

-- Paul


data = """type:
4
bgrColor:
255 255 255
fgrColor:
0 0 0
objId:
16
Num.Pts:
2
177 104
350 134
objStartId:
14
objEndId:
15
eventName:
trigger
eventCond:
guard
eventAction:
do_action
"""

from pyparsing import *

# define literals for field labels
type_       = Literal("type")
bgrColor    = Literal("bgrColor")
fgrColor    = Literal("fgrColor")
objId       = Literal("objId")
numPts      = Literal("Num.Pts")
objStartId  = Literal("objStartId")
objEndId    = Literal("objEndId")
eventName   = Literal("eventName")
eventCond   = Literal("eventCond")
eventAction = Literal("eventAction")

# define an integer, and tell parser to convert them to ints
intvalue = Word(nums).setParseAction( lambda s,l,toks: int(toks[0]) )

# define an alphabetic identifier
alphavalue = Word(alphas,alphanums+"_")

# define a 2D coordinate, with results names for fields
coordvalue = Group( intvalue.setResultsName("X") +
                    intvalue.setResultsName("Y") )

# define an RGB color value, with results names for fields
colorvalue = Group( intvalue.setResultsName("R") +
                    intvalue.setResultsName("G") +
                    intvalue.setResultsName("B") )

# compose an entry definition, using above-defined expressions, with
results names for fields
entry = ( type_ + ":" + "4" +
          bgrColor + ":" + colorvalue.setResultsName("bgrColor") +
          fgrColor + ":" + colorvalue.setResultsName("fgrColor") +
          objId + ":" + intvalue.setResultsName("objId") +
          numPts + ":" + Group( intvalue.setResultsName("numpts") +
                       OneOrMore( coordvalue ).setResultsName("coords")
                              ).setResultsName("pts") +
          objStartId + ":" + intvalue.setResultsName("objStartId") +
          objEndId + ":" + intvalue.setResultsName("objEndId") +
          eventName + ":" + alphavalue.setResultsName("eventName") +
          eventCond + ":" + alphavalue.setResultsName("eventCond") +
          eventAction + ":" + alphavalue.setResultsName("eventAction")
)

# scan through input data, and retrieve data fields as desired
for entryData,start,end in entry.scanString(data):
    print
"(%(objStartId)d,%(eventName)s,%(eventCond)s,%(eventAction)s,%(objEndId)d)"
% entryData
    print entryData.objId
    print entryData.bgrColor
    print entryData.fgrColor
    print [ (pt.X,pt.Y) for pt in entryData.pts.coords ]
    print [ tuple(pt) for pt in entryData.pts.coords ]


Prints:
(14,trigger,guard,do_action,15)
16
[255, 255, 255]
[0, 0, 0]
[(177, 104), (350, 134)]
[(177, 104), (350, 134)]




More information about the Python-list mailing list