[Python-checkins] CVS: python/nondist/sandbox/parrot README,NONE,1.1 euclid.py,NONE,1.1 parrot-gen.py,NONE,1.1

A.M. Kuchling akuchling@users.sourceforge.net
Wed, 26 Sep 2001 13:37:45 -0700


Update of /cvsroot/python/python/nondist/sandbox/parrot
In directory usw-pr-cvs1:/tmp/cvs-serv22033

Added Files:
	README euclid.py parrot-gen.py 
Log Message:
Initial checkin of Python->Parrot compiler


--- NEW FILE: README ---

This is a simple compiler that turns Python source into Parrot
bytecode.  By default, it will generate both a .pasm file (containing
Parrot assembly source) and a compiled .pdbc file.  -S will make it
only write the .pasm file, and -r will try to execute the code by
running Parrot's test_prog on the .pdbc file.

Limitations:

* Currently this only understands a *really* limited subset of Python: 
  simple expressions, integer variables, and the 'while' and 'if'
  statements (with no 'else' suites).

* The only allowable type is integer; strings, floats, long ints, &c.,
  aren't supported at all.
 
* It will die with an assertion if you feed it a language construct it
  doesn't handle (def, class, most operators).
 
* The code structure is suboptimal; this is just a quick-and-dirty hack.
 
Example usage:
 
ute parrot>python euclid.py     # Run code with Python
96 64
32
ute parrot>python parrot-gen.py -r euclid.py  # -r to run generated bytecode
96 64
32
ute parrot>cat euclid.pasm
main:
        set I3, 96
        set I10, 64
        set I9, 0
        add I0, I3, I9
   ... <rest deleted; you get the idea> ...
 
Currently the Parrot interpreter only supports integer, floating
point, and string registers.  There's no way to store the contents of
a register in memory as far as I can tell, and PMCs -- Parrot Magic
Cookies, the polymorphic objects that would correspond to PyObjects --
aren't implemented either.  This means it's not possible to handle
general variables, and therefore we'll have to wait for PMCs before
general Python programs can be handled.  Scalar PMCs are supposed to
be implemented for Parrot 0.0.3, and dictionary/list PMCs in 0.0.4.

You can discuss this code on either python-dev@python.org or
perl6-internals@perl.org.

--amk
--- NEW FILE: euclid.py ---

# Python implementation of Euclid's algorithm

m = 96
n = 64
print m,n

r = m % n
while r != 0:
    m = n
    n = r
    r = m % n

print n

--- NEW FILE: parrot-gen.py ---
"""parrot-gen.py

Parses a Python file and outputs a Parrot assembly file.

Currently this only understands a *really* limited subset of Python.
The only allowable type is integer; strings, floats, long ints, &c.,
aren't supported at all.  It will die with an assertion if you feed it
a language construct it doesn't handle (def, class, most operators).
"""

# created 2001/09/21, AMK

__revision__ = "$Id: parrot-gen.py,v 1.1 2001/09/26 20:37:43 akuchling Exp $"

import sys, os, types
import getopt, random

__doc__ = """%s: convert Python source to Parrot bytecode
  -r        Run Parrot assembler and interpreter on output
  -S        Only produce assembly source code (.pasm file)
"""

from compiler import ast, transformer, visitor, walk
from compiler.pycodegen import LocalNameFinder

# List 31 of the 32 integer registers as being available
FREE_REGISTERS = []
for i in range(1, 32):
    FREE_REGISTERS.append('I' + str(i))

# Global variable: maps local variable names to their assigned register
LOCAL_REGS = {}

symcount = 1                            # Counter for symbols
def gensym ():
    """gensym() -> string
    Return a unique string that can be used as a label in the generated
    assembly code.
    """
    global symcount
    sym = 'sym%i' % symcount
    symcount += 1
    return sym
    

class ParrotVisitor:
    """Visitor class that outputs Parrot bytecode.

    Attributes:
      lines : list of strings, each one a line of output.
    """
    
    def __init__ (self):
        self.lines = []
        self.symcount = 0

    def add_line (self, line):
        """add_line(line:string)
        Add a line of output to the generated code.  The line is
        modified slightly to ensure that it ends with a newline
        and is indented properly.
        """
        
        line = line.rstrip() + '\n'

        # Indent mnemonics
        # XXX this logic is obviously far too simple, but it'll do for now
        if ':' not in line:
            line = '\t' + line
        self.lines.append(line)

    def add_lines (self, lines):
        """add_lines(line:[string])
        Add several lines of output to the generated code.
        """
        for line in lines:
            self.add_line(line)

    # Visitor methods.  Most of the
    def visitAssign (self, node):
        a_name = node.nodes[0].name
        reg = LOCAL_REGS[a_name]
        lines = compile_expr(node.expr, reg, FREE_REGISTERS)
        self.add_lines(lines)
        # XXX assign result to 'a_name.name'

    def visitClass (self, node):
        assert 0, "class statement not supported"
    
    def visitIf (self, node):
        assert len(node.tests) == 1, "Only one test supported"
        test_expr, body = node.tests[0]
        sym = gensym()
        sym2 = gensym()

        lines = (compile_expr(test_expr, 'I0', FREE_REGISTERS) +
                 ["if I0, %s" % sym,
                  "branch %s" % sym2,
                  "%s:" % sym])
        self.add_lines(lines)
        self.visit(body)
        self.add_line("%s:" % sym2)
        
    def visitPrint (self, node):
        assert node.dest is None, "print without trailing newline not yet handled"

    def visitPrintnl (self, node):
        assert node.dest is None, "print>> not yet handled"
        lines = []
        for n in node.nodes:
            lines += compile_expr(n, 'I0', FREE_REGISTERS)
            lines += ["print I0", 'print " "']
        lines += ['print "\\n"']
        self.add_lines(lines)

##     def visitAnd(self, node):
##         assert 0, "not implemented"
##     def visitAssAttr(self, node):
##         assert 0, "not implemented"
##     def visitAssList(self, node):
##         assert 0, "not implemented"
##     def visitAssName(self, node):
##         assert 0, "not implemented"
##     def visitAssName(self, node):
##         assert 0, "not implemented"
##     def visitAssName(self, node):
##         assert 0, "not implemented"
##     def visitAssTuple(self, node):
##         assert 0, "not implemented"
    def visitAssert(self, node):
        assert 0, "not implemented"
##     def visitAugAssign(self, node):
##         assert 0, "not implemented"
##     def visitAugGetattr(self, node, mode):
##         assert 0, "not implemented"
##     def visitAugName(self, node, mode):
##         assert 0, "not implemented"
##     def visitAugSlice(self, node, mode):
##         assert 0, "not implemented"
##     def visitAugSubscript(self, node, mode):
##         assert 0, "not implemented"
##     def visitBackquote(self, node):
##         assert 0, "not implemented"
##     def visitBitand(self, node):
##         assert 0, "not implemented"
##     def visitBitor(self, node):
##         assert 0, "not implemented"
##     def visitBitxor(self, node):
##         assert 0, "not implemented"
    def visitBreak(self, node):
        assert 0, "not implemented"
    def visitCallFunc(self, node):
        assert 0, "not implemented"
    def visitClass(self, node):
        assert 0, "not implemented"
##     def visitCompare(self, node):
##         assert 0, "not implemented"
##     def visitConst(self, node):
##         assert 0, "not implemented"
##     def visitContinue(self, node):
##         assert 0, "not implemented"
##     def visitDict(self, node):
##         assert 0, "not implemented"
##     def visitDict(self, node):
##         assert 0, "not implemented"
    def visitDiscard(self, node):
        lines = compile_expr(node.expr, 'I0', FREE_REGISTERS)
        self.add_lines(lines)
##     def visitEllipsis(self, node):
##         assert 0, "not implemented"
    def visitExec(self, node):
        assert 0, "not implemented"
    def visitFor(self, node):
        assert 0, "not implemented"
##     def visitFrom(self, node):
##         assert 0, "not implemented"
##     def visitFrom(self, node):
##         assert 0, "not implemented"
##     def visitFunction(self, node):
##         assert 0, "not implemented"
##     def visitFunction(self, node):
##         assert 0, "not implemented"
##     def visitGetattr(self, node):
##         assert 0, "not implemented"
##     def visitGlobal(self, node):
##         assert 0, "not implemented"
##     def visitGlobal(self, node):
##         assert 0, "not implemented"
##     def visitImport(self, node):
##         assert 0, "not implemented"
    def visitImport(self, node):
        assert 0, "not implemented"
##     def visitInvert(self, node):
##         assert 0, "not implemented"
##     def visitKeyword(self, node):
##         assert 0, "not implemented"
##     def visitLambda(self, node):
##         assert 0, "not implemented"
##     def visitLambda(self, node):
##         assert 0, "not implemented"
##     def visitLeftShift(self, node):
##         assert 0, "not implemented"
##     def visitList(self, node):
##         assert 0, "not implemented"
##     def visitListComp(self, node):
##         assert 0, "not implemented"
##     def visitListCompFor(self, node):
##         assert 0, "not implemented"
##     def visitListCompIf(self, node, branch):
##         assert 0, "not implemented"
##     def visitModule(self, node):
##         pass
##         #assert 0, "not implemented"
##     def visitMul(self, node):
##         assert 0, "not implemented"
##     def visitName(self, node):
##         assert 0, "not implemented"
##     def visitNot(self, node):
##         assert 0, "not implemented"
##     def visitOr(self, node):
##         assert 0, "not implemented"
##     def visitPass(self, node):
##         assert 0, "not implemented"
##     def visitPower(self, node):
##         assert 0, "not implemented"
##     def visitRaise(self, node):
##         assert 0, "not implemented"
##     def visitReturn(self, node):
##         assert 0, "not implemented"
##     def visitRightShift(self, node):
##         assert 0, "not implemented"
##     def visitSlice(self, node, aug_flag=None):
##         assert 0, "not implemented"
##     def visitSliceobj(self, node):
##         assert 0, "not implemented"
##     def visitSub(self, node):
##         assert 0, "not implemented"
##     def visitSubscript(self, node, aug_flag=None):
##         assert 0, "not implemented"
##     def visitTest(self, node, jump):
##         assert 0, "not implemented"
##     def visitTryExcept(self, node):
##         assert 0, "not implemented"
##     def visitTryFinally(self, node):
##         assert 0, "not implemented"
##     def visitTuple(self, node):
##         assert 0, "not implemented"
##     def visitUnaryAdd(self, node):
##         assert 0, "not implemented"
##     def visitUnaryInvert(self, node):
##         assert 0, "not implemented"
##     def visitUnarySub(self, node):
##         assert 0, "not implemented"

    def visitWhile(self, node):
        assert node.else_ is None, "while...else not supported"
        start = gensym()
        end = gensym()

        self.add_line("%s:" % start)
        self.add_lines(compile_expr(node.test, 'I0', FREE_REGISTERS))
        self.add_line('EQ I0, 0, %s' % end)
        self.visit(node.body)
        self.add_line('BRANCH %s' % start)
        self.add_line("%s:" % end)

def compile_expr (expr, dest_register, avail_registers):
    """compile_expr(expr:Node, dest_register:string,
                    avail_registers:[string]) -> [string]

    Generate bytecode to evaluate the resulting expression.  The
    result is left in the register named by 'dest_register';
    'avail_registers' is a list of unused registers.
    
    
    XXX This handles registers really stupidly, and always leaves its
    result in I0; it should allocate registers more efficiently.
    XXX how to handle string constants, or constants of other types?
    """
    # Copy avail_registers, because we'll be mutating it later
    avail_registers = avail_registers[:]
    lines = []

    # Ensure destination register isn't listed among available regs
    if dest_register in avail_registers:
        avail_registers.remove(dest_register)
    
    if isinstance(expr, ast.Const):
        assert type(expr.value) == types.IntType, "Only int type is supported"
        return ["set %s, %r" % (dest_register, expr.value)]

    elif isinstance(expr, ast.Name):
        reg = LOCAL_REGS[expr.name]
        temp = random.choice(FREE_REGISTERS)
        return ["set %s, 0" % temp,
                "add %s, %s, %s" % (dest_register, reg, temp)]
    
    elif (isinstance(expr, ast.Add) or
          isinstance(expr, ast.Sub) or
          isinstance(expr, ast.Mul) or
          isinstance(expr, ast.Div) or
          isinstance(expr, ast.Mod)
          ):
        dict = {ast.Add: "add", ast.Sub: "sub",
                ast.Mul: "mul", ast.Div: "div",
                ast.Mod: "mod"}
        opcode = dict[expr.__class__]
        temp1 = random.choice(avail_registers)
        avail_registers.remove(temp1)
        temp2 = random.choice(avail_registers)
        avail_registers.remove(temp2)
        lines = (compile_expr(expr.left, temp1, avail_registers) +
                 compile_expr(expr.right, temp2, avail_registers) +
                 # perform operation
                 ["%s %s, %s, %s" % (opcode, dest_register,
                                     temp1, temp2)]
                )

    elif isinstance(expr, ast.Compare):
        dict = {'<':'lt', '>':'gt', '==':'eq',
                '>=':'ge', '=>':'ge',
                '<=':'le', '=>':'le',
                '!=':'ne', '<>':'ne'}
        temp1 = random.choice(avail_registers)
        avail_registers.remove(temp1)
        temp2 = random.choice(avail_registers)
        avail_registers.remove(temp2)

        # XXX this code generation is doubtless wrong when there
        # are multiple comparison operators (as in 1<2<3).
        lines = compile_expr(expr.expr, temp1, avail_registers)
        for op, expr2 in expr.ops:
            lines += compile_expr(expr2, temp2, avail_registers)
            sym = 'true' + gensym()
            sym2 = 'done' + gensym()
            lines += ["%s %s, %s, %s" % (dict[op], temp1, temp2, sym),
                      "set %s, 0" % dest_register,      # False branch
                      "eq %s, %s, %s" % (dest_register, dest_register, sym2),
                      "%s:" % sym,
                      "set %s, 1" % dest_register,      # True branch
                      "%s:" % sym2]

    else:
        raise RuntimeError, "Unexpected node in expression: %r" % expr
    
    return lines



def generate_assembly (input_name, output_name):
    ast = transformer.parseFile(input_name)
##    print ast
    
    # Determine locals and assign them to registers 
    lnf = walk(ast, LocalNameFinder(), 0)
    for name in lnf.getLocals().elements():
        reg = LOCAL_REGS[name] = random.choice(FREE_REGISTERS)
        FREE_REGISTERS.remove(reg)
##    print LOCAL_REGS
    
    # Walk tree and generate bytecode
    vtor = visitor.ASTVisitor()
    pv = ParrotVisitor()
    vtor.preorder(ast, pv)
    
    # Write the generated assembly code
    lines = ["main:\n"] + pv.lines
    output = open(output_name, 'w')
    output.writelines(lines)
    output.close()

    
def main():
    opts, args = getopt.getopt(sys.argv[1:], 'hrS', ['help'])

    do_run = 0
    do_assemble = 1
    for opt, param in opts:
        if opt in ['-h', '--help']:
            print __doc__ % sys.argv[0]
            sys.exit(0)
        elif opt == '-r':
            do_run = 1
        elif opt == '-S':
            do_assemble = 0
            
    if len(args) != 1:
        print __doc__ % sys.argv[0]
        sys.exit(0)
        
    for filename in args:
        root, ext = os.path.splitext(filename)
        asm_filename = root + '.pasm'
        bytecode_filename = root + '.pdbc'
        generate_assembly(filename, asm_filename)
        if do_assemble:
            err = os.system('perl assemble.pl %s > %s'
                            % (asm_filename, bytecode_filename) )
            if err: sys.exit(err)
        if do_run:
            err = os.system('./test_prog %s' % bytecode_filename)
            if err: sys.exit(err)
    
if __name__ == '__main__':
    main()