Reading variable length records...
salvasan at yahoo.com
salvasan at yahoo.com
Sat Sep 15 11:26:34 EDT 2001
"Bjorn Pettersen":
> I'm trying to read records from a 2 GB datafile ... The records are
> variable length and are separated by a five character delimiter.
The following Python class ought to do what you need and it handles the
partial delimiter problem using a buffer that grows as needed.
# Python class for reading a variable length record file
# with fixed multi-character delimiter
class VLR_File:
def __init__(self, filename, delimiter):
self.fhand = open(filename)
self.buffer = ""
self.delim = delimiter
self.dlen = len(delimiter)
self.eof = 0
def read_record(self):
if self.eof: return ""
while 1:
"read one character at a time"
c = self.fhand.read(1)
if not c:
self.eof = 1
"end of file -> return current buffer contents as last record"
return self.buffer
"append to buffer until delimiter is detected"
self.buffer = self.buffer + c
if len(self.buffer) >= self.dlen \
and self.buffer[-self.dlen:] == self.delim:
"flush buffer"
record = self.buffer
self.buffer = ""
return record
def close(self):
self.fhand.close()
#main program test
f = VLR_File("vlr_stuff.txt", "+++++")
rno = 0
while 1:
line = f.read_record()
if not line: break
print "REC",rno
print line
rno = rno + 1
f.close()
More information about the Python-list
mailing list