Difference between readlines() and iterating on a file object?

Hari Pulapaka hari at fake.com
Fri Aug 13 12:21:35 EDT 2004


Duncan Booth wrote:

> "Richard" <richardd at hmgcc.gov.uk> wrote in
> news:411cd102$1 at mail.hmgcc.gov.uk: 
> 
> 
>>Hi,
>>
>>Can anyone tell me what the difference is between
>>
>>for line in file.readlines( ):
> 
> 
> reads the entire file into memory and splits it up into a list of lines 
> then iterates over the list. If you break from the loop, tough you've lost 
> any lines that were read but you didn't handle.
> 
> 
>>and
>>
>>for line in file:
> 
> 
> reads part of the file and strips off one line at a time. Never creates a 
> list. Reads more only when it runs out of the block it read. If you break 
> from the loop you can do another 'for line in file' and get the remaining 
> lines.

However, one thing that bit me was that you cant use f.tell() to get the 
current position of the line in the file. If you use "for line in 
fileobject:" and then you first line is fileobject.tell() that will 
return the end of file position and not the position of the next line. 
Might be a bit counter-intuitive.

I am learning to be a better python programmer and I have written this 
small program to parse Mail box files and display emails which match the 
specified text. Any comments on this will appreciated. I know I can read 
the whole file using readlines(), not sure if that is good idea?


Batigol:~/pgrep hari$ cat pgrep.py
import sys

hits = {}
lines = {}
count = 0
emailstart = "From -"

def build(f, str):

     global count, hits, lines

     f.seek(0)
     start_email = 0
     end_email = 0
     pointers = []
     str_matched = []
     found = 0

     line = f.readline()

     while line != '':
         if line.find(emailstart) != -1:
             # Start of Mail
             start_email = f.tell()
             if found == 1:
                 #print "From - inside found "
                 pointers.append(end_email)
                 found = 0
                 hits[count] = pointers
                 lines[count] = str_matched
                 count += 1
                 pointers = []
                 str_matched = []

         if line.find(str) != -1:
             # Found string
             #print "Found string: "
             #print "count", count
             if len(pointers) == 0:
                 pointers.append(start_email)
             found = 1
             str_matched.append(line)
             #lines[count] = line

         end_email = f.tell()
         line = f.readline()

def display(f):
     global count, hits, lines

     if count == 0:
         sys.stdout.write("Not found! \n")
         sys.stdout.flush()
         sys.exit(0)

     sys.stdout.write("#: Line Contents\n")
     for i in range(count):
         for j in range(len(lines[i])):
             choice = "%s: %s" %(i, lines[i][j])
             sys.stdout.write(choice)

     sys.stdout.write("Enter # of email to display: ")
     sys.stdout.flush()
     input = sys.stdin.readline()
     try:
         i = int(input.strip())
         f.seek(hits[i][0])
         while f.tell() != hits[i][1]:
             sys.stdout.write(f.readline())
     except:
         sys.stderr.write("Invalid choice\n")

     sys.stdout.flush()

if __name__ == "__main__":
     try:
         f = file(sys.argv[1], "r")
     except:
         sys.stdout.write("Error opening file\n")
         sys.exit(1)

     build(f, sys.argv[2])
     response = 'n'
     #print response
     while response == 'n':
         display(f)
         sys.stdout.write("Do you want to quit, y or n? ")
         sys.stdout.flush()
         response = sys.stdin.readline().strip()

     f.close()
     sys.exit(0)



Thanks,

Hari






More information about the Python-list mailing list