File compare
Larry Bates
larry.bates at websafe.com
Wed Oct 12 19:26:11 EDT 2005
Sounds a little like "homework", but I'll help you out.
There are lots of ways, but this works.
import sys
class fobject:
def __init__(self, inputfilename):
try:
fp=open(inputfilename, 'r')
self.lines=fp.readlines()
except IOError:
print "Unable to open and read inputfilename=%s" % inputfilename
sys.exit(3)
self.datadict={}
for line in self.lines:
line=line.strip()
line=line.strip("'")
key, values=line.split(' ',1)
self.datadict[key]=values
return
def keys(self):
return self.datadict.keys()
def compare(self, otherobject):
keys=otherobject.keys()
match=0
for key in keys:
if self.datadict[key] == otherobject.datadict[key]: match+=1
return float(match)/float(len(keys))
if __name__=="__main__":
f1=fobject(r'f:\syscon\python\zbkup\f1.txt')
f2=fobject(r'f:\syscon\python\zbkup\f2.txt')
print f1.compare(f2)
Larry Bates
PyPK wrote:
> I have two files
> file1 in format
> <id> <val1> <test1> <test2>
> 'AA' 1 T T
> 'AB' 1 T F
>
> file2 same as file1
> <id> <val1> <test1> <test2>
> 'AA' 1 T T
> 'AB' 1 T T
>
> Also the compare should be based on id. So it should look for line
> starting with id 'AA' (for example) and then match the line so if in
> second case.
>
> so this is what I am looking for:
> 1. read both files.
> 2. read id of first line in file1 check if it matches with the same id
> in file2.
> 3. repeat step 2 for all lines in file1.
> 4. return a percent of success to failure. ie if one line matches and
> one lines does'nt then return 0.5 or 50%
>
> I wrote a boolean version ..as a start
>
> def getdata(f):
> try:
> f1 = open(f,'r')
> data=[]
> for eachline in f1.readlines():
> data.append(re.split("",
> re.sub('\n','',strip(re.split('\s\s+',eachline)[0]))))
> return data
> except IOError:
> raise("Invalid File Input")
>
> if __name__=='__main__':
>
> data1 = getdata('file1')
> data2 = getdata('file2')
>
> if data1 == data2:
> print "True"
> else:
> print "False"
>
> hope I am clear...
>
More information about the Python-list
mailing list