[Tutor] Re: one more question on a word in a specific place in a line

Christopher Smith csmith@blakeschool.org
Sat, 20 Oct 2001 14:51:55 -0500


>
>One way of doing this is separating each record into items in a list. You
>could separate around the blank lines like so:

A more general alternative is to access the file as a
"record" file with something like the class that Bjorn Peterson
previously discussed.  Doing so allows you to read records
separated by an arbitrary delimiter just like you would read
lines from a file:

file='mydatfile'
delim='\n\n'
f = rfile(file,delim) #instead of f=open(file,'r')
while 1:
    rec = f.read_record()
    if rec==None: break
    lines=rec.split('\n')
    print lines[1][:3] #the first 4 characters on the 2nd line
f.close()

My modification of Bjorn's class along with a read_records()
method is given below.

/c

#
# Python class for reading a variable length record file
# with fixed multi-character delimiter; a combination of
# Bjorn Peterson's class and read_record and my read_records
#
# cps 9/17/01
#
class rfile:
	def __init__(self, filename, delimiter, bufsize=100000):
		self.fhand = open(filename)
		self.bufsize = bufsize
		self.buffer = ""
		self.delim = delimiter
		self.dlen = len(delimiter)
		self.eof = 0

	def read_record(self):
		# either return None and record w/o delim 
            # OR '' and record with delim 
		if self.eof: return None
		while self.buffer.find(self.delim)==-1:
			c=self.fhand.read(self.bufsize)
			if not c:
				self.eof = 1
				return self.buffer
			self.buffer += c
		itsat=self.buffer.find(self.delim)
		record = self.buffer[:itsat]
		self.buffer = self.buffer[itsat+self.dlen:]
                return record

	def read_records(self):
		#
		# This is only necessary in case this is called after
		# already getting all the records.
		#
		if self.eof: return []
		data = ''
		records = []
		while 1:    
		    readData = self.fhand.read(self.bufsize)
		    if not readData:
		        break
		    
		    data += readData 
		    if data.find(self.delim)<>-1:
			partialRecords = data.split(self.delim)
		    	records += partialRecords[:-1] 
		    	data = partialRecords[-1]
		
		if data:
			records += [data]
			
		self.eof = 1
		return records
		

	def close(self):
		self.fhand.close()


#main program test
file='try'
delim='\n\n'

# read them all at once
f = rfile(file, delim)
recs=f.read_records()
print recs	
f.close()

# or read them one at a time
f = rfile(file,delim)
rno = 0
while 1:
    line = f.read_record()
    if line==None: break
    print "REC",rno
    print line
    rno = rno + 1
f.close()