python-to-c translator

Will Ware wware at world.std.com
Sun Oct 29 14:25:42 EST 2000


A week or two ago there was some discussion of the possibility of
something that translates a limited subset of Python directly into
C code. One of the big obstacles to this is Python's dynamic typing,
so the avoidance of dynamic typing has to be one of the enabling
assumptions. Anyway, here is an extremely crude ints-only rough
cut at a translator, which translates some functions, produces a
shared object library, and then imports it to do regression testing.


# Let's try to write stuff that translates a subset of Python to C.
# No dynamic typing allowed, and plenty of stuff not supported.

# For starters, let's assume everything is an int.

import dis, sys, os

class Accelerator:
    def __init__(self, modulename):
        self.modulename = modulename
        self.funcs = [ ]
        self.c_codes = [ ]
    def add(self, function):
        retval = ""
        self.funcs.append(function)
        func_code = function.func_code
        code = map(ord, func_code.co_code)
        consts = func_code.co_consts
        stacksize = func_code.co_stacksize
        varnames = func_code.co_varnames
        retval = (retval + """static PyObject * c_%s(PyObject *self, PyObject *args) {
int __stack[%d]; int *__sp = &__stack[0]; struct freelistlink *__freelist=NULL;
""" % (function.__name__, stacksize))
        preamble = ''
        for i in range(len(varnames)):
            preamble = preamble + 'int %s; ' % varnames[i]
        retval = (retval + preamble[:-1] + '\n' +
                  "if (!PyArg_ParseTuple(args, \"" +
                  func_code.co_argcount * "i" + "\"")
        for i in range(func_code.co_argcount):
            retval = retval + ", &" + varnames[i]
        retval = retval + ")) return NULL;\n"
        i = 0
        while i < len(code):
            x = code[i]
            i = i + 1
            y = dis.opname[x]
            retval = retval + "/*%-15s*/ Label_%03d:  " % (y, i-1)
            if x >= 90:
                oparg = code[i] + (code[i+1] << 8)
                i = i + 2
            if y == 'SET_LINENO':
                retval = retval + "/* Python line number %d */" % oparg
            elif y == 'LOAD_FAST':
                retval = retval + "*__sp++ = %s;" % varnames[oparg]
            elif y == 'STORE_FAST':
                retval = retval + "%s = *--__sp;" % varnames[oparg]
            elif y == 'LOAD_CONST':
                retval = retval + "*__sp++ = %s;" % consts[oparg]
            elif y == 'LOAD_GLOBAL':
                name = func_code.co_names[oparg]
                if name == 'range':
                    retval = retval + "*__sp++ = (int)my_range;"
                else:
                    retval = retval + "*__sp++ = (int)c_%s;" % name
            elif y == 'BUILD_LIST':
                retval = retval + "build_a_list(&__sp, %d, &__freelist);" % oparg
            elif y == 'BUILD_TUPLE':
                # No reason to differentiate in C between lists and tuples
                retval = retval + "build_a_list(&__sp, %d, &__freelist);" % oparg
            elif y == 'SETUP_LOOP':
                # discard offset, no action required in C
                pass
            elif y == 'COMPARE_OP':
                retval = retval + "__sp--; "
                if oparg == 0:
                    retval = retval + "__sp[-1] = (__sp[-1] < __sp[0]);"
                elif oparg == 1:
                    retval = retval + "__sp[-1] = (__sp[-1] <= __sp[0]);"
                elif oparg == 2:
                    retval = retval + "__sp[-1] = (__sp[-1] == __sp[0]);"
                elif oparg == 3:
                    retval = retval + "__sp[-1] = (__sp[-1] != __sp[0]);"
                elif oparg == 4:
                    retval = retval + "__sp[-1] = (__sp[-1] > __sp[0]);"
                elif oparg == 5:
                    retval = retval + "__sp[-1] = (__sp[-1] >= __sp[0]);"
                elif oparg == 6:
                    retval = retval + "/* IN?? */"
                elif oparg == 7:
                    retval = retval + "/* NOT_IN?? */"
                elif oparg == 8:
                    retval = retval + "/* IS?? */"
                elif oparg == 9:
                    retval = retval + "/* IS_NOT?? */"
                elif oparg == 10:
                    retval = retval + "/* EXC_MATCH?? */"
                else:
                    retval = retval + "/* BAD?? */"
            elif y == 'JUMP_ABSOLUTE':
                retval = retval + "goto Label_%03d;" % oparg
            elif y == 'JUMP_FORWARD':
                retval = retval + "goto Label_%03d;" % (i + oparg)
            elif y == 'JUMP_IF_FALSE':
                retval = retval + "if (!__sp[-1]) goto Label_%03d;" % oparg
            elif y == 'CALL_FUNCTION':
                na = oparg & 0xff
                # don't bother handling keywords
                retval = retval + "call_a_func(&__sp, %d);" % na
            elif y == 'FOR_LOOP':
                retval = retval + "if (hack_for_loop(&__sp)) goto Label_%03d;" % (oparg + i)
            elif y == 'POP_TOP':
                retval = retval + "__sp--;"
            elif y == 'PRINT_ITEM':
                retval = retval + "printf(\"%d \", *--__sp);"
            elif y == 'PRINT_NEWLINE':
                retval = retval + "printf(\"\\n\");"
            elif y == 'BINARY_SUBSCR':
                retval = retval + \
                         "__sp--; __sp[-1] = ((struct c_list*)__sp[-1])->contents[__sp[0]];"
            elif y == 'BINARY_ADD':
                retval = retval + "__sp--; __sp[-1] = __sp[-1] + __sp[0];"
            elif y == 'BINARY_SUBTRACT':
                retval = retval + "__sp--; __sp[-1] = __sp[-1] - __sp[0];"
            elif y == 'BINARY_MULTIPLY':
                retval = retval + "__sp--; __sp[-1] = __sp[-1] * __sp[0];"
            elif y == 'BINARY_DIVIDE':
                retval = retval + "__sp--; __sp[-1] = __sp[-1] / __sp[0];"
            elif y == 'RETURN_VALUE':
                retval = retval + \
                         "freelists(__freelist); return PyInt_FromLong(__sp[-1]);"
            elif y == 'POP_BLOCK':
                # no action required in C
                pass
            elif y[0] != '<':
                retval = retval + '/* UNIMPLEMENTED: %s */' % y
            else:
                print retval
                raise 'bad opcode', y
            retval = retval + "\n"
        self.c_codes.append(retval + '}\n')
    def make(self, outf=None):
        modulename = self.modulename
        if outf == None:
            outf = open(modulename + '.c', 'w')
        outf.write("""/* This file was automatically generated */
#include "Python.h"
#define None 0
struct c_list { int size; int *contents; };
struct freelistlink { struct c_list *array; struct freelistlink *next; };

/* Python does dynamic allocation with refcounting internally, but here
 * we will just malloc with abandon within a function, then free everybody
 * at the end. So we maintain a list of stuff to be freed within each
 * function, and at return time, free all of them.
 */

static struct c_list *allocatelist(int n, struct freelistlink **funcfreelist)
{
  struct c_list *mylist;
  struct freelistlink *L = (struct freelistlink*) malloc(sizeof(struct freelistlink));
  mylist = (struct c_list*) malloc(sizeof(struct c_list));
  mylist->size = n;
  mylist->contents = (int*) malloc(n * sizeof(int));
  L->next=*funcfreelist;
  L->array=mylist;
  *funcfreelist=L;
  return mylist;
}

static void freelists(struct freelistlink *funcfreelist)
{
  while(funcfreelist)
    {
      struct freelistlink *L;
      L=funcfreelist->next;
      free(funcfreelist->array->contents);
      free(funcfreelist->array);
      free(funcfreelist);
      funcfreelist=L;
    }
}

static void call_a_func(int **sp, int num_args)
{
  int i;
  PyObject *arg;
  arg = PyTuple_New(num_args);
  for (i = 0; i < num_args; i++)
    PyTuple_SetItem(arg, i, PyInt_FromLong((*sp)[i - num_args]));
  (*sp)[-num_args-1] = PyInt_AsLong(((PyCFunction)(*sp)[-num_args-1])(NULL,arg));
  (*sp) -= num_args;
}

/* There ought to be a call_a_c_func() where we don't bother translating
 * back and forth between PyObject*'s and int/int*'s. The functions for
 * which we did call_a_c_func() would have an indicator on the front of
 * their name ala Hungarian notation, maybe 'c_function' or '__function'.
 * They wouldn't have entries in the foo_methods[] array, as you would
 * never want to call them directly from Python, once they've been
 * translated into C.
 */

static void build_a_list(int **sp, int num_entries, struct freelistlink **funcfreelist)
{
  int i;
  struct c_list *mylist = allocatelist(num_entries, funcfreelist);
  for (i = 0; i < num_entries; i++)
    mylist->contents[i] = (*sp)[i - num_entries];
  (*sp)[-num_entries] = (int)mylist;
  (*sp) -= num_entries-1;
}

static int hack_for_loop(int **psp)
{
  int i, j, *sp;
  struct c_list *lst;
  sp = *psp;
  lst = (struct c_list *) sp[-2];
  if (sp[-1] >= lst->size)
    return 1;
  i = sp[-1];
  j = lst->contents[i++];
  sp[-1] = i;
  *sp++ = j;
  *psp = sp;
  return 0;
}

/* Big problem here: I don't see any way to deallocate the lists
 * created by range. They will be a memory leak until I figure it
 * out. Perhaps there'd be some way of passing range a pointer to
 * the caller's freelist.
 *
 * Maybe there should be different protocols for calling functions
 * defined in C rather than Python, which call_a_func() would be
 * smart about.
 */

static PyObject * my_range(PyObject *self, PyObject *args)
{
  int i, n;
  struct c_list *lst;
  struct freelistlink *freelist;
  if (!PyArg_ParseTuple(args, "i", &n)) return NULL;
  lst = (struct c_list*) malloc(sizeof(struct c_list));
  lst->size = n;
  lst->contents = (int*) malloc(n * sizeof(int));
  for (i = 0; i < n; i++)
    lst->contents[i] = i;
  return PyInt_FromLong((int) lst);
}

""")
        for x in self.funcs:
            outf.write("static PyObject * c_%s(PyObject *self, PyObject *args);\n"
                       % x.__name__)
        outf.write("\n")
        for x in self.c_codes:
            outf.write(x)
            outf.write("\n")
        outf.write("static PyMethodDef %s_methods[] = {\n"
                   % self.modulename)
        for x in self.funcs:
            outf.write("  {\"%s\", %s, 1},\n"
                       % (x.__name__, "c_" + x.__name__))
        str = \
"""  {NULL, NULL, 0}
};

DL_EXPORT(void) init%s()
{
  Py_InitModule("%s", %s_methods);
}
"""
        outf.write(str % (self.modulename,
                          self.modulename,
                          self.modulename))
        outf.close()
        os.system('gcc -g -I/usr/include/python1.5 -c -o %s.o %s.c'
                  % (self.modulename, self.modulename))
        os.system('gcc -g -shared -o %s.so %s.o'
                  % (self.modulename, self.modulename))

if __name__ == "__main__":

    os.system('rm -f foo.c foo.o foo.so')

    def counter():
        for i in range(5):
            print i,
        print
    def addtwo(x):
        y = [1, 2, 3]
        return x + y[1]
    def silly(x, y):
        counter()
        return (6 * addtwo(x) + y * 3) / 7

    # dis.dis(counter)

    fast = Accelerator('foo')
    fast.add(counter)
    fast.add(addtwo)
    fast.add(silly)
    fast.make()

    import foo, random
    for i in range(8):
        x = int(40 * random.random())
        y = int(40 * random.random())
        z1 = silly(x, y)
        z2 = foo.silly(x, y)
        if z1 != z2:
            print z1, z2
            raise 'oops'
        print z1, z2

    if len(sys.argv) > 1:
        os.system('shar py2c.py foo.c > py2c.shar')


-- 
# - - - - - - - - - - - - - - - - - - - - - - - -
# Resistance is futile. Capacitance is efficacious.
# Will Ware	email:    wware @ world.std.com



More information about the Python-list mailing list