File not read to end

andrew.jefferies at gmail.com andrew.jefferies at gmail.com
Thu Apr 26 08:19:10 EDT 2007


On Apr 25, 2:51 pm, Larry Bates <larry.ba... at websafe.com> wrote:
> andrew.jeffer... at gmail.com wrote:
> > Hi,
>
> > I'm trying to write a simple log parsing program. I noticed that it
> > isn't reading my log file to the end.
>
> > My log is around 200,000 lines but it is stopping at line 26,428. I
> > checked that line and there aren't any special characters.
>
> > This is the file reading code segment that I'm using:
> >     sysFile=open(sysFilename,'r')
> >     lineCount = 0
> >     for line in sysFile:
> >         lineCount +=1
> >         print str(lineCount) + " -- " + line
>
> > I also stuck this same code bit into a test script and it was able to
> > parse the entire log without problem. Very quirky.
>
> > This is my first foray from Perl to Python so I appreciate any help.
>
> > Thanks in advance.
>
> > --Andrew
>
> Show us more of your surrounding code so we have some chance of figuring
> out why this working code stops.  There's nothing wrong with this code,
> the problem is somewhere else.
>
> Suggestion:
>
>     lineCount = 0
>     for line in sysFile:
>         lineCount +=1
>         print str(lineCount) + " -- " + line
>
> can be written:
>
>     for lineCount, line in enumerate(sysFile):
>         print "%i--%s" % (lineCount, line)
>
> -Larry

Hi Larry,

I've attached the whole script. Thanks again for your help.

--Andrew


import getopt, sys, re, os

def main():
    try:# Get options for processing
        o, a = getopt.getopt(sys.argv[1:], 'a:d:hl')
    except getopt.GetoptError:
        # print help information and exit:
        usage()
        sys.exit(2)
    opts = {}
    for k,v in o:    #Parse parameters into
hash
        opts[k] = v
    #make sure that all of the needed options are included
	if opts.has_key('-h'):   #Return help for -
h
		usage()
		sys.exit(0)
    if opts.has_key('-l'):
        pathname = opts['-l']
    if not (opts.has_key('-a')):
        usage()
        sys.exit()
    else:
        address=opts['-a']
    if not (opts.has_key('-d')):
        usage()
        sys.exit()
    if not (opts.has_key('-l')): # Use current path if not provided
        pathname = os.path.abspath(os.path.dirname(sys.argv[0]))

    # Get file names and open files
    sysFilename = os.path.abspath(pathname) + "\sys" + opts["-d"] +
".txt"
    #logFilename = opts["-l"] + "\log" + opts["-d"] + ".txt"
    spamFilename =  os.path.abspath(pathname) + "\spam" + opts["-d"] +
".log"
    print "Loading Files:\n" + sysFilename + "\n" + spamFilename +
"\n"
    try:	#Open log files
        sysFile=open(sysFilename,'rb')
        #logFile=open(logFilename,'r')
        spamFile=open(spamFilename,'rb')
    except:
        print "could not open file for reading" , sys.exc_info()[0]
        sys.exit()
    ToAddr = {} # This will hold Messages TO the address
    FrAddr = {} # This will hold Messages FROM the address
    numFound = 0 # For Testing
    notFound = 0 # For Testing
    lineCount = 0 # For Testing
    #Read file and get message IDs that correspond to the searched
address
    for line in sysFile:
        lineCount +=1 # For Testing
       # print str(lineCount) + " -- " + line
        daRegex = re.compile(address)
        if daRegex.search(line): #Found address in line - Continue
processing
        #re.search(address,line): #If line has address
            print line + "\n" # For Testing
            numFound +=1 # For Testing
            if re.search('MAIL FROM:',line): #Add it (message id) to
the From list if needed
                MID = getMID(line)
                if FrAddr.has_key(MID):
                    break
                else:
                    FrAddr[MID]=""
                    #print "From: " + MID + "\n"
            elif re.search('RCPT TO:',line): #Add it (message id) to
the To list if needed
                MID = getMID(line)
                if ToAddr.has_key(MID):
                    break
                else:
                   ToAddr[MID]=""
        else:
            notFound +=1  #For Testing
    # Close and re-open file for re-processes (there is probably a
better way to do this)
    sysFile.close
    sysFile=open(sysFilename,'r')

    for line in sysFile: # Get all messages with message IDs that have
been found
        MID = getMID(line)
        if FrAddr.has_key(MID):
            FrAddr[MID]+=line
          #  print line + "\n"
        elif ToAddr.has_key(MID):
            ToAddr[MID]+=line
    sysFile.close

    for line in spamFile: # Get similar messages from spam file
        MID = getMID(line)
        if FrAddr.has_key(MID):
            FrAddr[MID]+='SPAM>>>'+ line
        elif ToAddr.has_key(MID):
            ToAddr[MID]+='SPAM>>>'+ line
    spamFile.close

#open output files
    fname = pathname + "\\" + address + ".txt"
    fout = open(fname,'w')


# Output and format
    for key in FrAddr.keys():
        fout.write("<<<<<<< FROM   "+ address+ "    Message ID  "+ key
+ "------------\n")
        fout.write(FrAddr[key]+"\n")
    for key in ToAddr.keys():
        fout.write(">>>>>>> To     "+ address+ "    Message ID  "+ key
+ "------------\n")
        fout.write(ToAddr[key]+"\n")

    print "------------------- Done processing
---------------------"
    print "Found:  " + str(numFound) #Test
    print "Not matching:  " + str(notFound) #Test
    print "Line Cound:  " + str(lineCount) #test
    fout.close
def getMID(daLine): #Extracts the message ID from the message
    p = re.compile("\(.*?\)")
    pid=p.search(daLine)
    if pid:
        id=pid.group()
        id=id.lstrip('\(')
        id=id.rstrip('\)')
        #print id
        return id
    else:
        return

def usage(): # Provides usage feedback
    print """
	Syntax:
    -a          email account to find
    -l          location of log files (OPTIONAL)
    -d          date, in file date format (####)
    """


if __name__ == "__main__":	# Call mail loop
    main()





More information about the Python-list mailing list