log parser design question

Paul McGuire ptmcg at austin.rr.com
Mon Jan 29 00:20:47 EST 2007


On Jan 27, 10:43 pm, avidfan <n... at nowhere.com> wrote:
> I need to parse a log file using python and I need some advice/wisdom
> on the best way to go about it:
>
> The log file entries will consist of something like this:
>
> ID=8688 IID=98889998 execute begin - 01.21.2007 status enabled
>         locked working.lock
>         status running
>         status complete
>
> ID=9009 IID=87234785 execute wait - 01.21.2007 status wait
>         waiting to lock
>         status wait
>         waiting on ID=8688
>
> and so on...
>
For the parsing of this data, here is a pyparsing approach.  Once 
parse, the pyparsing ParseResults data structures can be massaged into 
a queryable list.  See the examples at the end for accessing the 
individual parsed fields.

-- Paul

data = """
ID=8688 IID=98889998 execute begin - 01.21.2007 status enabled
        locked working.lock
        status running
        status complete


ID=9009 IID=87234785 execute wait - 01.21.2007 status wait
        waiting to lock
        status wait
        waiting on ID=8688

"""
from pyparsing import *

integer=Word(nums)
idref = "ID=" + integer.setResultsName("id")
iidref = "IID=" + integer.setResultsName("iid")
date = Regex(r"\d\d\.\d\d\.\d{4}")

logLabel = Group("execute" + oneOf("begin wait"))
logStatus = Group("status" + oneOf("enabled wait"))
lockQual = Group("locked" + Word(alphanums+"."))
waitingOnQual = Group("waiting on" + idref)
statusQual = Group("status" + oneOf("running complete wait"))
waitingToLockQual = Group(Literal("waiting to lock"))
statusQualifier = statusQual | waitingOnQual | waitingToLockQual | 
lockQual
logEntry = idref + iidref + logLabel.setResultsName("logtype") + "-" \
    + date + logStatus.setResultsName("status") \
    + ZeroOrMore(statusQualifier).setResultsName("quals")

for tokens in logEntry.searchString(data):
    print tokens
    print tokens.dump()
    print tokens.id
    print tokens.iid
    print tokens.status
    print tokens.quals
    print

prints:

['ID=', '8688', 'IID=', '98889998', ['execute', 'begin'], '-', 
'01.21.2007', ['status', 'enabled'], ['locked', 'working.lock'], 
['status', 'running'], ['status', 'complete']]
['ID=', '8688', 'IID=', '98889998', ['execute', 'begin'], '-', 
'01.21.2007', ['status', 'enabled'], ['locked', 'working.lock'], 
['status', 'running'], ['status', 'complete']]
- id: 8688
- iid: 98889998
- logtype: ['execute', 'begin']
- quals: [['locked', 'working.lock'], ['status', 'running'], 
['status', 'complete']]
- status: ['status', 'enabled']
8688
98889998
['status', 'enabled']
[['locked', 'working.lock'], ['status', 'running'], ['status', 
'complete']]

['ID=', '9009', 'IID=', '87234785', ['execute', 'wait'], '-', 
'01.21.2007', ['status', 'wait'], ['waiting to lock'], ['status', 
'wait'], ['waiting on', 'ID=', '8688']]
['ID=', '9009', 'IID=', '87234785', ['execute', 'wait'], '-', 
'01.21.2007', ['status', 'wait'], ['waiting to lock'], ['status', 
'wait'], ['waiting on', 'ID=', '8688']]
- id: 9009
- iid: 87234785
- logtype: ['execute', 'wait']
- quals: [['waiting to lock'], ['status', 'wait'], ['waiting on', 
'ID=', '8688']]
- status: ['status', 'wait']
9009
87234785
['status', 'wait']
[['waiting to lock'], ['status', 'wait'], ['waiting on', 'ID=', 
'8688']]




More information about the Python-list mailing list