[Chicago] Python Development in Chicago

Fri Oct 26 16:07:55 CEST 2007

I'm doing some research in graph theory and I have a set of graphs (known as
the AT&T set) that is in a seemingly proprietary format - I want to swap it
into the graphml XML format.  I hacked together a little Python script for
this.  Would you folks say that this is 'pythonic', or does this look
newbie'ish?

____________________________________________________________________

import re, mmap, os

class token:
        def __init__(self):
                self.type = None
                self.data = None

class tokenizer:
        def __init__(self, inmap, out):
                self.inmap = inmap
                self.out = out

        def nextToken(self):
                line = inmap.readline()
                if re.search("^graph\s.*\s{$", line):
                        ident = line[6:len(line)-3]
                        result = token()
                        result.type = 'graph'
                        result.data = ident
                        return result
                elif re.search("^\s*subgraph.*{$", line):
                        parse = re.search("subgraph\s.*\s{$", line).group()
                        ident = line[10:len(line)-3]
                        result = token()
                        result.type = 'subgraph'
                        result.data = ident
                        return result
                elif re.search("^\s*}$", line):
                        result = token()
                        result.type = 'endgroup'
                        return result
                elif re.search("^\s*n\d+\s--\sn\d+;$", line):
                        parse = re.search("n\d+\s--\sn\d+", line).group()
                        split = parse.partition('--')
                        first = re.search("\d+", split[0]).group()
                        last = re.search("\d+", split[2]).group()
                        result = token()
                        result.type = 'edge'
                        result.data = [first,last]
                        return result
                return None

        def processToken(self, t):
                if not t:
                        return
                if t.type == 'graph':
                        self.out.write('<graph id="%s">\n' % t.data)
                elif t.type == 'subgraph':
                        self.out.write('<graph id="%s">\n' % t.data)
                        self.sg += 1
                elif t.type == 'endgroup':
                        self.out.write('</graph>\n')
                        if self.sg > 0:
                                self.sg -= 1
                elif t.type == 'edge':
                        self.out.write('<edge source="%s" target="%s"/>\n' %
(t.data[0], t.data[1]))

        def go(self):
                self.sg = 0
                self.out.write("""<?xml version="1.0" encoding="UTF-8"?>
<graphml>
""")
                while self.inmap.tell() <  self.inmap.size():
                        lex.processToken(lex.nextToken())

                self.out.write("</graphml>")

try:
        infile = "ug.txt"
        insize = os.path.getsize(infile)
        fd = open(infile, "r+")
        inmap = mmap.mmap(fd.fileno(), insize, None, mmap.ACCESS_READ)
        outfile = "out.txt"
        out = open(outfile, "r+")
        lex = tokenizer(inmap, out)
        lex.go()
except IOError:
        print "IO Error Occurred"
finally:
        inmap.close()
        out.close()
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.python.org/pipermail/chicago/attachments/20071026/c3a36cd5/attachment.htm