File not read to end
andrew.jefferies at gmail.com
andrew.jefferies at gmail.com
Thu Apr 26 08:19:10 EDT 2007
On Apr 25, 2:51 pm, Larry Bates <larry.ba... at websafe.com> wrote:
> andrew.jeffer... at gmail.com wrote:
> > Hi,
>
> > I'm trying to write a simple log parsing program. I noticed that it
> > isn't reading my log file to the end.
>
> > My log is around 200,000 lines but it is stopping at line 26,428. I
> > checked that line and there aren't any special characters.
>
> > This is the file reading code segment that I'm using:
> > sysFile=open(sysFilename,'r')
> > lineCount = 0
> > for line in sysFile:
> > lineCount +=1
> > print str(lineCount) + " -- " + line
>
> > I also stuck this same code bit into a test script and it was able to
> > parse the entire log without problem. Very quirky.
>
> > This is my first foray from Perl to Python so I appreciate any help.
>
> > Thanks in advance.
>
> > --Andrew
>
> Show us more of your surrounding code so we have some chance of figuring
> out why this working code stops. There's nothing wrong with this code,
> the problem is somewhere else.
>
> Suggestion:
>
> lineCount = 0
> for line in sysFile:
> lineCount +=1
> print str(lineCount) + " -- " + line
>
> can be written:
>
> for lineCount, line in enumerate(sysFile):
> print "%i--%s" % (lineCount, line)
>
> -Larry
Hi Larry,
I've attached the whole script. Thanks again for your help.
--Andrew
import getopt, sys, re, os
def main():
try:# Get options for processing
o, a = getopt.getopt(sys.argv[1:], 'a:d:hl')
except getopt.GetoptError:
# print help information and exit:
usage()
sys.exit(2)
opts = {}
for k,v in o: #Parse parameters into
hash
opts[k] = v
#make sure that all of the needed options are included
if opts.has_key('-h'): #Return help for -
h
usage()
sys.exit(0)
if opts.has_key('-l'):
pathname = opts['-l']
if not (opts.has_key('-a')):
usage()
sys.exit()
else:
address=opts['-a']
if not (opts.has_key('-d')):
usage()
sys.exit()
if not (opts.has_key('-l')): # Use current path if not provided
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
# Get file names and open files
sysFilename = os.path.abspath(pathname) + "\sys" + opts["-d"] +
".txt"
#logFilename = opts["-l"] + "\log" + opts["-d"] + ".txt"
spamFilename = os.path.abspath(pathname) + "\spam" + opts["-d"] +
".log"
print "Loading Files:\n" + sysFilename + "\n" + spamFilename +
"\n"
try: #Open log files
sysFile=open(sysFilename,'rb')
#logFile=open(logFilename,'r')
spamFile=open(spamFilename,'rb')
except:
print "could not open file for reading" , sys.exc_info()[0]
sys.exit()
ToAddr = {} # This will hold Messages TO the address
FrAddr = {} # This will hold Messages FROM the address
numFound = 0 # For Testing
notFound = 0 # For Testing
lineCount = 0 # For Testing
#Read file and get message IDs that correspond to the searched
address
for line in sysFile:
lineCount +=1 # For Testing
# print str(lineCount) + " -- " + line
daRegex = re.compile(address)
if daRegex.search(line): #Found address in line - Continue
processing
#re.search(address,line): #If line has address
print line + "\n" # For Testing
numFound +=1 # For Testing
if re.search('MAIL FROM:',line): #Add it (message id) to
the From list if needed
MID = getMID(line)
if FrAddr.has_key(MID):
break
else:
FrAddr[MID]=""
#print "From: " + MID + "\n"
elif re.search('RCPT TO:',line): #Add it (message id) to
the To list if needed
MID = getMID(line)
if ToAddr.has_key(MID):
break
else:
ToAddr[MID]=""
else:
notFound +=1 #For Testing
# Close and re-open file for re-processes (there is probably a
better way to do this)
sysFile.close
sysFile=open(sysFilename,'r')
for line in sysFile: # Get all messages with message IDs that have
been found
MID = getMID(line)
if FrAddr.has_key(MID):
FrAddr[MID]+=line
# print line + "\n"
elif ToAddr.has_key(MID):
ToAddr[MID]+=line
sysFile.close
for line in spamFile: # Get similar messages from spam file
MID = getMID(line)
if FrAddr.has_key(MID):
FrAddr[MID]+='SPAM>>>'+ line
elif ToAddr.has_key(MID):
ToAddr[MID]+='SPAM>>>'+ line
spamFile.close
#open output files
fname = pathname + "\\" + address + ".txt"
fout = open(fname,'w')
# Output and format
for key in FrAddr.keys():
fout.write("<<<<<<< FROM "+ address+ " Message ID "+ key
+ "------------\n")
fout.write(FrAddr[key]+"\n")
for key in ToAddr.keys():
fout.write(">>>>>>> To "+ address+ " Message ID "+ key
+ "------------\n")
fout.write(ToAddr[key]+"\n")
print "------------------- Done processing
---------------------"
print "Found: " + str(numFound) #Test
print "Not matching: " + str(notFound) #Test
print "Line Cound: " + str(lineCount) #test
fout.close
def getMID(daLine): #Extracts the message ID from the message
p = re.compile("\(.*?\)")
pid=p.search(daLine)
if pid:
id=pid.group()
id=id.lstrip('\(')
id=id.rstrip('\)')
#print id
return id
else:
return
def usage(): # Provides usage feedback
print """
Syntax:
-a email account to find
-l location of log files (OPTIONAL)
-d date, in file date format (####)
"""
if __name__ == "__main__": # Call mail loop
main()
More information about the Python-list
mailing list