log parser design question
Paul McGuire
ptmcg at austin.rr.com
Mon Jan 29 00:20:47 EST 2007
On Jan 27, 10:43 pm, avidfan <n... at nowhere.com> wrote:
> I need to parse a log file using python and I need some advice/wisdom
> on the best way to go about it:
>
> The log file entries will consist of something like this:
>
> ID=8688 IID=98889998 execute begin - 01.21.2007 status enabled
> locked working.lock
> status running
> status complete
>
> ID=9009 IID=87234785 execute wait - 01.21.2007 status wait
> waiting to lock
> status wait
> waiting on ID=8688
>
> and so on...
>
For the parsing of this data, here is a pyparsing approach. Once
parse, the pyparsing ParseResults data structures can be massaged into
a queryable list. See the examples at the end for accessing the
individual parsed fields.
-- Paul
data = """
ID=8688 IID=98889998 execute begin - 01.21.2007 status enabled
locked working.lock
status running
status complete
ID=9009 IID=87234785 execute wait - 01.21.2007 status wait
waiting to lock
status wait
waiting on ID=8688
"""
from pyparsing import *
integer=Word(nums)
idref = "ID=" + integer.setResultsName("id")
iidref = "IID=" + integer.setResultsName("iid")
date = Regex(r"\d\d\.\d\d\.\d{4}")
logLabel = Group("execute" + oneOf("begin wait"))
logStatus = Group("status" + oneOf("enabled wait"))
lockQual = Group("locked" + Word(alphanums+"."))
waitingOnQual = Group("waiting on" + idref)
statusQual = Group("status" + oneOf("running complete wait"))
waitingToLockQual = Group(Literal("waiting to lock"))
statusQualifier = statusQual | waitingOnQual | waitingToLockQual |
lockQual
logEntry = idref + iidref + logLabel.setResultsName("logtype") + "-" \
+ date + logStatus.setResultsName("status") \
+ ZeroOrMore(statusQualifier).setResultsName("quals")
for tokens in logEntry.searchString(data):
print tokens
print tokens.dump()
print tokens.id
print tokens.iid
print tokens.status
print tokens.quals
print
prints:
['ID=', '8688', 'IID=', '98889998', ['execute', 'begin'], '-',
'01.21.2007', ['status', 'enabled'], ['locked', 'working.lock'],
['status', 'running'], ['status', 'complete']]
['ID=', '8688', 'IID=', '98889998', ['execute', 'begin'], '-',
'01.21.2007', ['status', 'enabled'], ['locked', 'working.lock'],
['status', 'running'], ['status', 'complete']]
- id: 8688
- iid: 98889998
- logtype: ['execute', 'begin']
- quals: [['locked', 'working.lock'], ['status', 'running'],
['status', 'complete']]
- status: ['status', 'enabled']
8688
98889998
['status', 'enabled']
[['locked', 'working.lock'], ['status', 'running'], ['status',
'complete']]
['ID=', '9009', 'IID=', '87234785', ['execute', 'wait'], '-',
'01.21.2007', ['status', 'wait'], ['waiting to lock'], ['status',
'wait'], ['waiting on', 'ID=', '8688']]
['ID=', '9009', 'IID=', '87234785', ['execute', 'wait'], '-',
'01.21.2007', ['status', 'wait'], ['waiting to lock'], ['status',
'wait'], ['waiting on', 'ID=', '8688']]
- id: 9009
- iid: 87234785
- logtype: ['execute', 'wait']
- quals: [['waiting to lock'], ['status', 'wait'], ['waiting on',
'ID=', '8688']]
- status: ['status', 'wait']
9009
87234785
['status', 'wait']
[['waiting to lock'], ['status', 'wait'], ['waiting on', 'ID=',
'8688']]
More information about the Python-list
mailing list