.py to sqlite translator [1 of 2]

smitty1e smitty1e at gmail.com
Fri Oct 26 00:19:19 EDT 2007


Disclaimer(s): the author is nobody's pythonista.  This could probably
be done more elegantly.
The driver for the effort is to get PyMacs to work with new-style
classes.
This rendering stage stands alone, and might be used for other
purposes.
A subsequent post will show using the resulting file to produce (I
think valid) .el trampoline
signatures for PyMacs.
If nothing else, it shows some python internals in an interesting way.
Tested against version 2.5.1
Maybe "lumberjack.py" would be a better name, since "It cuts down
trees, goes real slow, and uses disk galore.  Wishes it'd been
webfoot[1], just like its dear author".
Cheers,
Chris

[1] Author was born in Oregon.

#A sample file:
class sample( object ):
    """fairly trivial sample class for demonstration purposes.
    """
    def __init__( self
                , some_string ):
        self.hold_it = some_string

    def show( self ):
        print self.hold_it

#Invocation:
# ./pysqlrender.py -f sample.py -o output

#Script:
#!/usr/bin/python

"""Script to dump the parse tree of an input file to a SQLite
database.
"""

from   optparse import OptionParser
import os
import parser
import pprint
import re
import sqlite3
import symbol
import token
import types

from   types import ListType \
                  , TupleType

target_table  = """CREATE TABLE tbl_parse_tree        (
                     parse_tree_id     INTEGER PRIMARY KEY
AUTOINCREMENT
                   , parse_tree_symbol_id
                   , parse_tree_indent
                   , parse_tree_value  );"""

target_insert = """INSERT INTO tbl_parse_tree        (
                     parse_tree_symbol_id
                   , parse_tree_indent
                   , parse_tree_value )
                   VALUES         (%s,  %s, '%s' );"""

symbol_table  = """CREATE TABLE tlp_parse_tree_symbol (
                     parse_tree_symbol_id INTEGER PRIMARY KEY
                   , parse_tree_symbol_val   );"""
symbol_insert = """INSERT INTO tlp_parse_tree_symbol (
                     parse_tree_symbol_id
                   , parse_tree_symbol_val )
                   VALUES ( %s, '%s' );"""

class symbol_manager( object ):
    """ Class to merge symbols and tokens for ease of use.
    """
    def __init__( self
                , c    ):
        for k in symbol.sym_name:
            sql = symbol_insert % ( k, symbol.sym_name[k] )
            try:
                c.execute( sql )
            except sqlite3.IntegrityError:
                pass
        for k in token.tok_name:
            sql = symbol_insert % ( k, token.tok_name[k] )
            try:
                c.execute( sql )
            except sqlite3.IntegrityError:
                pass

    def get_symbol( self
                  , key  ):
        ret = -1
        if   symbol.sym_name.has_key(key): ret = symbol.sym_name[key]
        elif token.tok_name.has_key(key) : ret = token.tok_name[ key]
        return ret

    def recurse_it( self, tester ):
        """Check to see if dump_tup should recurse
        """
        if self.get_symbol(tester) > 0:
            return True
        return False

class stocker( object ):
    """Remembers the depth of the tree and effects the INSERTs
       into the output file.
    """
    def __init__( self ):
        self.cur_indent = 0

    def do_symbol( self
                 , c
                 , symbol_value
                 , val  = "" ):
        """Stuff something from the parse tree into the database
table.
        """
        if   symbol_value==5: self.cur_indent += 1
        elif symbol_value==6: self.cur_indent -= 1

        try:
            sql = target_insert    \
                % ( symbol_value
                  , self.cur_indent
                  , re.sub( "'", "`", str(val) ))
            c.execute( sql  )
        except AttributeError:
            print "connection bad in lexer"
        except sqlite3.OperationalError:
            print "suckage at indent of %s for %s" \
                % (self.cur_indent, sql)

def dump_tup( tup
            , sym
            , c
            , stok ):
    """Recursive function to descend TUP and analyze its elements.
         tup       parse tree of a file, rendered as a tuple
         sym       dictionary rendered from symbol module
         c         live database cursor
         stok      output object effect token storage
    """
    for node in tup:
        typ = type( node )
        r   = getattr( typ
                     , "__repr__"
                     , None       )

        if (issubclass(typ, tuple) and r is tuple.__repr__):

            if token.tok_name.has_key( node[0] ):
                stok.do_symbol( c
                              , node[0]
                              , node[1] )
            elif sym.recurse_it( node[0] ):
                stok.do_symbol( c
                              , node[0]
                              , '__py__' ) #If you say node[1] here,
                                           # the sqlite file is fat
                                           # and instructive
                for node2 in node[1:]:
                    dump_tup( node2
                            , sym
                            , c
                            , stok )
            else:
                stok.do_symbol( c
                              , node[0]
                              , node[1] )
                dump_tup(  node[1]
                         , sym
                         , c
                         , stok )
        else:
            stok.do_symbol( c
                          , 0
                          , node )


def convert_python_source_tree_to_table( file_name
                                       , target_name ):
    """Retrieve information from the parse tree of a source file.
    Create an output database file in sqlite.
    Make a table in there, and then procede to stuff the flattened
input parse tree into it.

    file_name   Name of the file to read Python source code from.
    target_name Name for the sqlite database
    """
    x    = open( file_name ).readlines()
    y    = []
    [y.append( line.replace("\r\n","")   ) for line in x]

    ast  = parser.suite(    "\n".join(y) )
    conn = sqlite3.connect( target_name  )
    conn.isolation_level  = None
    c    = conn.cursor()
    c.execute(              target_table )
    c.execute(              symbol_table )
    sym  = symbol_manager(  c            )
    stok = stocker()

    #pprint.pprint( ast.totuple() )
    dump_tup( ast.totuple()
            , sym
            , c
            , stok )

def main():
    usage  = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option("-f", "--file",  dest="filename"
                     , action="store", type="string"
                     , help  ="read python source from FILENAME")
    #TODO: test for existence of output file, eject if exists
    parser.add_option("-o", "--output",dest="output"
                     , action="store", type="string"
                     , help  ="name of sqlite output file")
    (options, args) = parser.parse_args()

    convert_python_source_tree_to_table(  options.filename
                                       ,  options.output   )

if __name__ == "__main__":
    main()




More information about the Python-list mailing list