Searching through two logfiles in parallel?
darnold
darnold992000 at yahoo.com
Tue Jan 8 14:16:09 EST 2013
i don't think in iterators (yet), so this is a bit wordy.
same basic idea, though: for each message (set of parameters), build a
list of transactions consisting of matching send/receive times.
mildly tested:
from datetime import datetime, timedelta
sendData = '''\
05:00:06 Message sent - Value A: 5.6, Value B: 6.2, Value C: 9.9
05:00:08 Message sent - Value A: 3.3, Value B: 4.3, Value C: 2.3
05:00:10 Message sent - Value A: 3.0, Value B: 0.4, Value C: 5.4
#orphan
05:00:14 Message sent - Value A: 1.0, Value B: 0.4, Value C: 5.4
07:00:14 Message sent - Value A: 1.0, Value B: 0.4, Value C: 5.4
'''
receiveData = '''\
05:00:09 Message received - Value A: 5.6, Value B: 6.2, Value C:
9.9
05:00:12 Message received - Value A: 3.3, Value B: 4.3, Value C:
2.3
05:00:15 Message received - Value A: 1.0, Value B: 0.4, Value C:
5.4
07:00:18 Message received - Value A: 1.0, Value B: 0.4, Value C:
5.4
07:00:30 Message received - Value A: 1.0, Value B: 0.4, Value C:
5.4 #orphan
07:00:30 Message received - Value A: 17.0, Value B: 0.4, Value C:
5.4 #orphan
'''
def parse(line):
timestamp, rest = line.split(' Message ')
action, params = rest.split(' - ' )
params = params.split('#')[0]
return timestamp.strip(), params.strip()
def isMatch(sendTime,receiveTime,maxDelta):
if sendTime is None:
return False
sendDT = datetime.strptime(sendTime,'%H:%M:%S')
receiveDT = datetime.strptime(receiveTime,'%H:%M:%S')
return receiveDT - sendDT <= maxDelta
results = {}
for line in sendData.split('\n'):
if not line.strip():
continue
timestamp, params = parse(line)
if params not in results:
results[params] = [{'sendTime': timestamp, 'receiveTime':
None}]
else:
results[params].append({'sendTime': timestamp, 'receiveTime':
None})
for line in receiveData.split('\n'):
if not line.strip():
continue
timestamp, params = parse(line)
if params not in results:
results[params] = [{'sendTime': None, 'receiveTime':
timestamp}]
else:
for tranNum, transaction in enumerate(results[params]):
if
isMatch(transaction['sendTime'],timestamp,timedelta(seconds=5)):
results[params][tranNum]['receiveTime'] = timestamp
break
else:
results[params].append({'sendTime': None, 'receiveTime':
timestamp})
for params in sorted(results):
print params
for transaction in results[params]:
print '\t%s' % transaction
>>> ================================ RESTART ================================
>>>
Value A: 1.0, Value B: 0.4, Value C: 5.4
{'sendTime': '05:00:14', 'receiveTime': '05:00:15'}
{'sendTime': '07:00:14', 'receiveTime': '07:00:18'}
{'sendTime': None, 'receiveTime': '07:00:30'}
Value A: 17.0, Value B: 0.4, Value C: 5.4
{'sendTime': None, 'receiveTime': '07:00:30'}
Value A: 3.0, Value B: 0.4, Value C: 5.4
{'sendTime': '05:00:10', 'receiveTime': None}
Value A: 3.3, Value B: 4.3, Value C: 2.3
{'sendTime': '05:00:08', 'receiveTime': '05:00:12'}
Value A: 5.6, Value B: 6.2, Value C: 9.9
{'sendTime': '05:00:06', 'receiveTime': '05:00:09'}
>>>
HTH,
Don
More information about the Python-list
mailing list