A newbie dictionary question

Jan Vroonhof vroonhof at frege.math.ethz.ch
Fri Nov 5 21:55:34 EST 1999


I have the distinct feeling this is FAQ, but if so I missed it. It is
late (or better early) here.
I am trying to teach  myself python with a little problem I have that
involves parsing ChangeLog files.

Thus I wrote a module that creates a dictionary-like class with the
text of the ChangeLog entry as the key. Later I want to use this to
check whether specific changes were applied. However I want to do the
comparisons regardless of whitespace. Thus I a made small class that
has __cpm__ and __hash__ functions that ignore whitespace.

However looking up objects in the hashes doesn't seem to work. Can
somebody tell me what obvious thing I am missing?

The code is below. Note that I added the complete module (which is 20
lines or so more than a trivial example) in the hope that somebody
comment on it stylewise as this is my first Python program.
In particular I have the feeling the while loops in the scan(self)
function could be done much clearer

#!/usr/bin/env python
"""This class implements a dictonary like class,
ChangeLog, that represents entries from ChangeLog file"""
import re
import sys
import string
from UserDict import UserDict

authpat = re.compile(
    r"^(\d\d\d\d-\d\d-\d\d|\w\w\w \w\w\w\s+\d+\s+\d\d:\d\d:\d\d \d\d\d\d)"
    +r"\s+([^<]+)\s+<(.+)>\s?$")
emptypat = re.compile(r"^\s*$")
startpat = re.compile("^(?:\t| {4})\\s*\\* (.*)")
contpat =  re.compile("^\\s*(?:\t| {4})\\s*(\\S.+)")

_debug = 0

class ChangeAuthor:
    def __init__(self,name,address):
        self.name = name
        self.address = address

    def __str__(self):
        return self.name + "<" + self.address + ">"
    
class ChangeEntry:
    def __init__(self,value):
        self.value = value
    
    def __str__(self):
        self.value

    # bad approximation?
    def nospaces(self):
        return string.join(string.split(self.value))

    def __cmp__(self,other):
        return self.nospaces() == other.nospaces()

    def __hash__(self):
        return hash(self.nospaces())
    

class ChangeLog(UserDict):        
    def scan(self,fd):
        author = None
        while 1:
            line = fd.readline()
            if author:
                if not line: break
                if emptypat.match(line): continue 
                while 1:
                    s = startpat.match(line)
                    if not s:
                        break
                    entry = s.group(1)
                    while 1:
                        line = fd.readline()
                        if not line: break
                        if emptypat.match(line): continue
                        s = contpat.match(line)
                        if not s: break
                        entry = entry + "\n"+ s.group(1)
                    if _debug: print entry
                    self[ChangeEntry(entry)] = author
                if _debug: print author
            if not line: return
            m = authpat.match(line)
            if m: author = ChangeAuthor(m.group(2),m.group(3))

    def scanfiles(self,files):
        for file in files:
            fd = open(file)
            self.scan(fd)
            fd.close()
        
    
            
def _test():
    global _debug
    _debug = 0
    args = sys.argv[1:]
    if not len(args):
        args = [ "ChangeLog"]
    clonce = ChangeLog()
    clonce.scanfiles(args);
    _debug = 0
    cltwice = ChangeLog()
    cltwice.scanfiles(args)

    # Verify  that the keys are actually equal and have the same has value
    # I am betting that for dictonaries constructed in the same way the order
    # of keys() is identical.
    for n in range(0,len(clonce.keys())):
        keyonce = clonce.keys()[n]
        keytwice = clonce.keys()[n]
        if keyonce != keytwice:
            raise RuntimeError, "Keys Different"
        if hash(keyonce) != hash(keytwice):
            raise RuntimeError, "Hash Different"

    for v in cltwice.keys():
        cltwice[v]
        clonce[v]   # This gives a KeyError, why?
         

if __name__ == '__main__':
	_test()





More information about the Python-list mailing list