Reading variable length records...

salvasan at yahoo.com salvasan at yahoo.com
Sat Sep 15 11:26:34 EDT 2001


"Bjorn Pettersen":
> I'm trying to read records from a 2 GB datafile ... The records are
> variable length and are separated by a five character delimiter.

The following Python class ought to do what you need and it handles the
partial delimiter problem using a buffer that grows as needed.


# Python class for reading a variable length record file
# with fixed multi-character delimiter
class VLR_File:
    def __init__(self, filename, delimiter):
        self.fhand = open(filename)
        self.buffer = ""
        self.delim = delimiter
        self.dlen = len(delimiter)
        self.eof = 0

    def read_record(self):
        if self.eof: return ""
        while 1:
            "read one character at a time"
            c = self.fhand.read(1)
            if not c:
                self.eof = 1
                "end of file -> return current buffer contents as last record"
                return self.buffer
            "append to buffer until delimiter is detected"
            self.buffer = self.buffer + c
            if len(self.buffer) >= self.dlen \
               and self.buffer[-self.dlen:] == self.delim:
                "flush buffer"
                record = self.buffer
                self.buffer = ""
                return record

    def close(self):
        self.fhand.close()


#main program test
f = VLR_File("vlr_stuff.txt", "+++++")
rno = 0
while 1:
    line = f.read_record()
    if not line: break
    print "REC",rno
    print line
    rno = rno + 1
f.close()






More information about the Python-list mailing list