Searching through two logfiles in parallel?

darnold darnold992000 at yahoo.com
Tue Jan 8 14:16:09 EST 2013


i don't think in iterators (yet), so this is a bit wordy.
same basic idea, though: for each message (set of parameters), build a
list of transactions consisting of matching send/receive times.

mildly tested:


from datetime import datetime, timedelta

sendData = '''\
    05:00:06 Message sent - Value A: 5.6, Value B: 6.2, Value C: 9.9
    05:00:08 Message sent - Value A: 3.3, Value B: 4.3, Value C: 2.3
    05:00:10 Message sent - Value A: 3.0, Value B: 0.4, Value C: 5.4
#orphan
    05:00:14 Message sent - Value A: 1.0, Value B: 0.4, Value C: 5.4
    07:00:14 Message sent - Value A: 1.0, Value B: 0.4, Value C: 5.4
'''

receiveData = '''\
    05:00:09 Message received - Value A: 5.6, Value B: 6.2, Value C:
9.9
    05:00:12 Message received - Value A: 3.3, Value B: 4.3, Value C:
2.3
    05:00:15 Message received - Value A: 1.0, Value B: 0.4, Value C:
5.4
    07:00:18 Message received - Value A: 1.0, Value B: 0.4, Value C:
5.4
    07:00:30 Message received - Value A: 1.0, Value B: 0.4, Value C:
5.4   #orphan
    07:00:30 Message received - Value A: 17.0, Value B: 0.4, Value C:
5.4  #orphan
'''

def parse(line):
    timestamp, rest = line.split(' Message ')
    action, params = rest.split(' - ' )
    params = params.split('#')[0]
    return timestamp.strip(), params.strip()

def isMatch(sendTime,receiveTime,maxDelta):
    if sendTime is None:
        return False

    sendDT = datetime.strptime(sendTime,'%H:%M:%S')
    receiveDT = datetime.strptime(receiveTime,'%H:%M:%S')
    return receiveDT - sendDT <= maxDelta

results = {}

for line in sendData.split('\n'):
    if not line.strip():
        continue

    timestamp, params = parse(line)
    if params not in results:
        results[params] = [{'sendTime': timestamp, 'receiveTime':
None}]
    else:
        results[params].append({'sendTime': timestamp, 'receiveTime':
None})

for line in receiveData.split('\n'):
    if not line.strip():
        continue

    timestamp, params = parse(line)
    if params not in results:
        results[params] = [{'sendTime': None, 'receiveTime':
timestamp}]
    else:
        for tranNum, transaction in enumerate(results[params]):
            if
isMatch(transaction['sendTime'],timestamp,timedelta(seconds=5)):
                results[params][tranNum]['receiveTime'] = timestamp
                break
        else:
            results[params].append({'sendTime': None, 'receiveTime':
timestamp})

for params in sorted(results):
    print params
    for transaction in results[params]:
        print '\t%s' % transaction


>>> ================================ RESTART ================================
>>>
Value A: 1.0, Value B: 0.4, Value C: 5.4
	{'sendTime': '05:00:14', 'receiveTime': '05:00:15'}
	{'sendTime': '07:00:14', 'receiveTime': '07:00:18'}
	{'sendTime': None, 'receiveTime': '07:00:30'}
Value A: 17.0, Value B: 0.4, Value C: 5.4
	{'sendTime': None, 'receiveTime': '07:00:30'}
Value A: 3.0, Value B: 0.4, Value C: 5.4
	{'sendTime': '05:00:10', 'receiveTime': None}
Value A: 3.3, Value B: 4.3, Value C: 2.3
	{'sendTime': '05:00:08', 'receiveTime': '05:00:12'}
Value A: 5.6, Value B: 6.2, Value C: 9.9
	{'sendTime': '05:00:06', 'receiveTime': '05:00:09'}
>>>

HTH,
Don



More information about the Python-list mailing list