.py to sqlite translator [1 of 2]

Guilherme Polo ggpolo at gmail.com
Fri Oct 26 09:49:31 EDT 2007


2007/10/26, smitty1e <smitty1e at gmail.com>:
> Disclaimer(s): the author is nobody's pythonista.  This could probably
> be done more elegantly.
> The driver for the effort is to get PyMacs to work with new-style
> classes.
> This rendering stage stands alone, and might be used for other
> purposes.
> A subsequent post will show using the resulting file to produce (I
> think valid) .el trampoline
> signatures for PyMacs.
> If nothing else, it shows some python internals in an interesting way.
> Tested against version 2.5.1
> Maybe "lumberjack.py" would be a better name, since "It cuts down
> trees, goes real slow, and uses disk galore.  Wishes it'd been
> webfoot[1], just like its dear author".
> Cheers,
> Chris
>
> [1] Author was born in Oregon.
>
> #A sample file:
> class sample( object ):
>     """fairly trivial sample class for demonstration purposes.
>     """
>     def __init__( self
>                 , some_string ):
>         self.hold_it = some_string
>
>     def show( self ):
>         print self.hold_it
>
> #Invocation:
> # ./pysqlrender.py -f sample.py -o output
>
> #Script:
> #!/usr/bin/python
>
> """Script to dump the parse tree of an input file to a SQLite
> database.
> """
>
> from   optparse import OptionParser
> import os
> import parser
> import pprint
> import re
> import sqlite3
> import symbol
> import token
> import types
>
> from   types import ListType \
>                   , TupleType
>
> target_table  = """CREATE TABLE tbl_parse_tree        (
>                      parse_tree_id     INTEGER PRIMARY KEY
> AUTOINCREMENT
>                    , parse_tree_symbol_id
>                    , parse_tree_indent
>                    , parse_tree_value  );"""
>
> target_insert = """INSERT INTO tbl_parse_tree        (
>                      parse_tree_symbol_id
>                    , parse_tree_indent
>                    , parse_tree_value )
>                    VALUES         (%s,  %s, '%s' );"""
>
> symbol_table  = """CREATE TABLE tlp_parse_tree_symbol (
>                      parse_tree_symbol_id INTEGER PRIMARY KEY
>                    , parse_tree_symbol_val   );"""
> symbol_insert = """INSERT INTO tlp_parse_tree_symbol (
>                      parse_tree_symbol_id
>                    , parse_tree_symbol_val )
>                    VALUES ( %s, '%s' );"""
>
> class symbol_manager( object ):
>     """ Class to merge symbols and tokens for ease of use.
>     """
>     def __init__( self
>                 , c    ):
>         for k in symbol.sym_name:
>             sql = symbol_insert % ( k, symbol.sym_name[k] )
>             try:
>                 c.execute( sql )
>             except sqlite3.IntegrityError:
>                 pass
>         for k in token.tok_name:
>             sql = symbol_insert % ( k, token.tok_name[k] )
>             try:
>                 c.execute( sql )
>             except sqlite3.IntegrityError:
>                 pass
>
>     def get_symbol( self
>                   , key  ):
>         ret = -1
>         if   symbol.sym_name.has_key(key): ret = symbol.sym_name[key]
>         elif token.tok_name.has_key(key) : ret = token.tok_name[ key]
>         return ret
>
>     def recurse_it( self, tester ):
>         """Check to see if dump_tup should recurse
>         """
>         if self.get_symbol(tester) > 0:
>             return True
>         return False
>
> class stocker( object ):
>     """Remembers the depth of the tree and effects the INSERTs
>        into the output file.
>     """
>     def __init__( self ):
>         self.cur_indent = 0
>
>     def do_symbol( self
>                  , c
>                  , symbol_value
>                  , val  = "" ):
>         """Stuff something from the parse tree into the database
> table.
>         """
>         if   symbol_value==5: self.cur_indent += 1
>         elif symbol_value==6: self.cur_indent -= 1
>
>         try:
>             sql = target_insert    \
>                 % ( symbol_value
>                   , self.cur_indent
>                   , re.sub( "'", "`", str(val) ))
>             c.execute( sql  )
>         except AttributeError:
>             print "connection bad in lexer"
>         except sqlite3.OperationalError:
>             print "suckage at indent of %s for %s" \
>                 % (self.cur_indent, sql)
>
> def dump_tup( tup
>             , sym
>             , c
>             , stok ):
>     """Recursive function to descend TUP and analyze its elements.
>          tup       parse tree of a file, rendered as a tuple
>          sym       dictionary rendered from symbol module
>          c         live database cursor
>          stok      output object effect token storage
>     """
>     for node in tup:
>         typ = type( node )
>         r   = getattr( typ
>                      , "__repr__"
>                      , None       )
>
>         if (issubclass(typ, tuple) and r is tuple.__repr__):
>
>             if token.tok_name.has_key( node[0] ):
>                 stok.do_symbol( c
>                               , node[0]
>                               , node[1] )
>             elif sym.recurse_it( node[0] ):
>                 stok.do_symbol( c
>                               , node[0]
>                               , '__py__' ) #If you say node[1] here,
>                                            # the sqlite file is fat
>                                            # and instructive
>                 for node2 in node[1:]:
>                     dump_tup( node2
>                             , sym
>                             , c
>                             , stok )
>             else:
>                 stok.do_symbol( c
>                               , node[0]
>                               , node[1] )
>                 dump_tup(  node[1]
>                          , sym
>                          , c
>                          , stok )
>         else:
>             stok.do_symbol( c
>                           , 0
>                           , node )
>
>
> def convert_python_source_tree_to_table( file_name
>                                        , target_name ):
>     """Retrieve information from the parse tree of a source file.
>     Create an output database file in sqlite.
>     Make a table in there, and then procede to stuff the flattened
> input parse tree into it.
>
>     file_name   Name of the file to read Python source code from.
>     target_name Name for the sqlite database
>     """
>     x    = open( file_name ).readlines()
>     y    = []
>     [y.append( line.replace("\r\n","")   ) for line in x]
>
>     ast  = parser.suite(    "\n".join(y) )
>     conn = sqlite3.connect( target_name  )
>     conn.isolation_level  = None
>     c    = conn.cursor()
>     c.execute(              target_table )
>     c.execute(              symbol_table )
>     sym  = symbol_manager(  c            )
>     stok = stocker()
>
>     #pprint.pprint( ast.totuple() )
>     dump_tup( ast.totuple()
>             , sym
>             , c
>             , stok )
>
> def main():
>     usage  = "usage: %prog [options] arg"
>     parser = OptionParser(usage)
>     parser.add_option("-f", "--file",  dest="filename"
>                      , action="store", type="string"
>                      , help  ="read python source from FILENAME")
>     #TODO: test for existence of output file, eject if exists
>     parser.add_option("-o", "--output",dest="output"
>                      , action="store", type="string"
>                      , help  ="name of sqlite output file")
>     (options, args) = parser.parse_args()
>
>     convert_python_source_tree_to_table(  options.filename
>                                        ,  options.output   )
>
> if __name__ == "__main__":
>     main()
>
> --
> http://mail.python.org/mailman/listinfo/python-list
>

Hello, I took a look at that script and I have made some changes.
Before posting my version let me comment on somethings..

First, you probably noticed that it gets slow as you run the script
with "larger" files. All the time "wasted" on this is because you set
isolation level to None, so there are a lot of commits and commits
make this slow. Removing that isolation level and doing only one
commit after dump_tup cuts basically all time wasted;

Second, don't use "%s" to insert values into your sql query string
please. For sqlite you should substitute those by "?"s and pass a
tuple to it;

Third, don't use "yourdict.has_key(key)", use "key in yourdict". I
have read the Disclamer at top, but maybe you wanted to hear
something;

Fourth, It could be the email client but did you use 3 spaces for indent ? :/ ;

Fifth, other observations are left to the reader as exercise

My version:
#!/usr/bin/env python

"""Script to dump the parse tree of an input file to a SQLite
database.
"""

import token
import parser
import symbol
import sqlite3
from optparse import OptionParser

TARGET_TABLE  = """CREATE TABLE tbl_parse_tree (
        parse_tree_id    INTEGER PRIMARY KEY AUTOINCREMENT,
        parse_tree_symbol_id,
        parse_tree_indent,
        parse_tree_value)"""

TARGET_INSERT = """INSERT INTO tbl_parse_tree
        (parse_tree_symbol_id, parse_tree_indent, parse_tree_value)
        VALUES (?, ?, ?)"""

SYMBOL_TABLE  = """CREATE TABLE tlp_parse_tree_symbol (
        parse_tree_symbol_id    INTEGER PRIMARY KEY,
        parse_tree_symbol_val)"""

SYMBOL_INSERT = """INSERT INTO tlp_parse_tree_symbol
        (parse_tree_symbol_id, parse_tree_symbol_val) VALUES (?, ?)"""

class SymbolManager(object):
    """Class to merge symbols and tokens for ease of use."""

    def __init__(self, c):
        self.to_merge = token.tok_name.copy()
        self.to_merge.update(symbol.sym_name)

        for k, v in self.to_merge.iteritems():
            c.execute(SYMBOL_INSERT, (k, v))

    def get_symbol(self, key):
        return self.to_merge[key] if key in self.to_merge else -1

    def recurse_it(self, tester):
        """Check to see if dump_tup should recurse"""
        if self.get_symbol(tester) > 0:
            return True

        return False


class Stocker(object):
    """Remembers the depth of the tree and effects the INSERTs
    into the output file.
    """

    def __init__(self):
        self.cur_indent = 0

    def do_symbol(self, c, symbol_value, val=""):
        """Stuff something from the parse tree into the database table."""
        if symbol_value == 5:
            self.cur_indent += 1

        elif symbol_value==6:
            self.cur_indent -= 1

        c.execute(TARGET_INSERT, (symbol_value, self.cur_indent,
                                  str(val).replace("'", "`")))


def dump_tup(tup, sym, c, stok):
    """Recursive function to descend TUP and analyze its elements.
        tup       parse tree of a file, rendered as a tuple
        sym       dictionary rendered from symbol module
        c         live database cursor
        stok      output object effect token storage
    """
    for node in tup:
        typ = type(node)
        r = getattr(typ, "__repr__", None)

        if (issubclass(typ, tuple) and r is tuple.__repr__):
            if node[0] in token.tok_name:
                stok.do_symbol(c, node[0], node[1])
            elif sym.recurse_it(node[0]):
                #If you say node[1] here, the sqlite file is fat
                # and instructive
                stok.do_symbol(c, node[0], '__py__' )
                for node2 in node[1:]:
                    dump_tup(node2, sym, c, stok)
            else:
                stok.do_symbol(c, node[0], node[1])
                dump_tup(node[1], sym, c, stok)

        else:
            stok.do_symbol(c, 0, node)

def python_source_tree_to_db(file_name, target_name):
    """Retrieve information from the parse tree of a source file.
    Create an output database file in sqlite.
    Make a table in there, and then procede to stuff the flattened
    input parse tree into it.

    file_name   Name of the file to read Python source code from.
    target_name Name for the sqlite database
    """
    conn = sqlite3.connect(target_name)
    c = conn.cursor()
    c.execute(TARGET_TABLE)
    c.execute(SYMBOL_TABLE)

    ast = parser.suite(''.join(open(file_name, 'rU').readlines()))
    sym = SymbolManager(c)
    stok = Stocker()

    #pprint.pprint(ast.totuple())
    dump_tup(ast.totuple(), sym, c, stok)
    conn.commit()

def main():
    oparser = OptionParser("usage: %prog [options] arg")
    oparser.add_option("-f", "--file", dest="filename",
                      help="read python source from FILENAME")
    oparser.add_option("-o", "--output", dest="output",
                      help="name of sqlite output file")
    (options, _) = oparser.parse_args()

    if not options.filename or not options.output:
        oparser.print_help()
    else:
        try:
            open(options.output)
            print "Output file exists, chose another one."
        except IOError:
            python_source_tree_to_db(options.filename, options.output)


if __name__ == "__main__":
    main()

-- 
-- Guilherme H. Polo Goncalves



More information about the Python-list mailing list