A newbie dictionary question
Jan Vroonhof
vroonhof at frege.math.ethz.ch
Fri Nov 5 21:55:34 EST 1999
I have the distinct feeling this is FAQ, but if so I missed it. It is
late (or better early) here.
I am trying to teach myself python with a little problem I have that
involves parsing ChangeLog files.
Thus I wrote a module that creates a dictionary-like class with the
text of the ChangeLog entry as the key. Later I want to use this to
check whether specific changes were applied. However I want to do the
comparisons regardless of whitespace. Thus I a made small class that
has __cpm__ and __hash__ functions that ignore whitespace.
However looking up objects in the hashes doesn't seem to work. Can
somebody tell me what obvious thing I am missing?
The code is below. Note that I added the complete module (which is 20
lines or so more than a trivial example) in the hope that somebody
comment on it stylewise as this is my first Python program.
In particular I have the feeling the while loops in the scan(self)
function could be done much clearer
#!/usr/bin/env python
"""This class implements a dictonary like class,
ChangeLog, that represents entries from ChangeLog file"""
import re
import sys
import string
from UserDict import UserDict
authpat = re.compile(
r"^(\d\d\d\d-\d\d-\d\d|\w\w\w \w\w\w\s+\d+\s+\d\d:\d\d:\d\d \d\d\d\d)"
+r"\s+([^<]+)\s+<(.+)>\s?$")
emptypat = re.compile(r"^\s*$")
startpat = re.compile("^(?:\t| {4})\\s*\\* (.*)")
contpat = re.compile("^\\s*(?:\t| {4})\\s*(\\S.+)")
_debug = 0
class ChangeAuthor:
def __init__(self,name,address):
self.name = name
self.address = address
def __str__(self):
return self.name + "<" + self.address + ">"
class ChangeEntry:
def __init__(self,value):
self.value = value
def __str__(self):
self.value
# bad approximation?
def nospaces(self):
return string.join(string.split(self.value))
def __cmp__(self,other):
return self.nospaces() == other.nospaces()
def __hash__(self):
return hash(self.nospaces())
class ChangeLog(UserDict):
def scan(self,fd):
author = None
while 1:
line = fd.readline()
if author:
if not line: break
if emptypat.match(line): continue
while 1:
s = startpat.match(line)
if not s:
break
entry = s.group(1)
while 1:
line = fd.readline()
if not line: break
if emptypat.match(line): continue
s = contpat.match(line)
if not s: break
entry = entry + "\n"+ s.group(1)
if _debug: print entry
self[ChangeEntry(entry)] = author
if _debug: print author
if not line: return
m = authpat.match(line)
if m: author = ChangeAuthor(m.group(2),m.group(3))
def scanfiles(self,files):
for file in files:
fd = open(file)
self.scan(fd)
fd.close()
def _test():
global _debug
_debug = 0
args = sys.argv[1:]
if not len(args):
args = [ "ChangeLog"]
clonce = ChangeLog()
clonce.scanfiles(args);
_debug = 0
cltwice = ChangeLog()
cltwice.scanfiles(args)
# Verify that the keys are actually equal and have the same has value
# I am betting that for dictonaries constructed in the same way the order
# of keys() is identical.
for n in range(0,len(clonce.keys())):
keyonce = clonce.keys()[n]
keytwice = clonce.keys()[n]
if keyonce != keytwice:
raise RuntimeError, "Keys Different"
if hash(keyonce) != hash(keytwice):
raise RuntimeError, "Hash Different"
for v in cltwice.keys():
cltwice[v]
clonce[v] # This gives a KeyError, why?
if __name__ == '__main__':
_test()
More information about the Python-list
mailing list