Pretty Scheme, ??? Python

Neil Cerutti horpner at yahoo.com
Mon Jul 2 13:18:27 EDT 2007


The following simple language parser and interpreter is from
chapter 2 of the textbook: _Programming Languages: Application
and Interpretation_, by Shriram Krishnamurthi.

http://www.cs.brown.edu/~sk/Publications/Books/ProgLangs/

First, I'll show the Scheme version of the program, which uses
the cool define-type and type-case macros (I'd never heard of
them before reading this book, which is too bad). I think it is
beautiful.

; Our grammar:
; <WAE> ::= 
;   <num>
;   | { + <WAE> <WAE> }
;   | { - <WAE> <WAE> }
;   | {with {<id> <WAE>} <WAE>}
;   | <id>
(define-type WAE
  [num (n number?)]
  [add (lhs WAE?)
       (rhs WAE?)]
  [sub (lhs WAE?)
       (rhs WAE?)]
  [id (name symbol?)]
  [with (name symbol?) (named-expr WAE?) (named-body WAE?)])

;; parse : sexp --> WAE
;; To convert s-expressions into WAEs
(define (parse sexp)
  (cond
    [(number? sexp) (num sexp)]
    [(symbol? sexp) (id sexp)]
    [(list? sexp)
     (case (first sexp)
       [(+) (if (= (length sexp) 3)
                (add (parse (second sexp))
                     (parse (third sexp)))
                (error 'parse "add: expected 2 args"))]
       [(-) (if (= (length sexp) 3)
                (sub (parse (second sexp))
                     (parse (third sexp)))
                (error 'parse "sub: expected 2 args"))]
       [(with) (with (first (second sexp))
                     (parse (second (second sexp)))
                     (parse (third sexp)))]
       [else (error 'parse "invalid expression")])]
    [else (error 'parse "invalid expression")]))

;; subst : WAE symbol WAE --> WAE
;; substitutes second argument with third argument in first argument,
;; as per the rules of substitution; the resulting expression contains
;; no free instances of the second argument.
(define (subst expr sub-id val)
  (type-case WAE expr
    [num (n) expr]
    [add (lhs rhs) (add (subst lhs sub-id val)
                        (subst rhs sub-id val))]
    [sub (lhs rhs) (sub (subst lhs sub-id val)
                        (subst rhs sub-id val))]
    [with (bound-id named-expr bound-body)
          (if (symbol=? bound-id sub-id)
              (with bound-id
                    (subst named-expr sub-id val)
                    bound-body)
              (with bound-id
                    (subst named-expr sub-id val)
                    (subst bound-body sub-id val)))]
    [id (v) (if (symbol=? v sub-id) val expr)]))

;; calc : WAE --> number
;; consumes a WAE and computes the corresponding number
(define (calc an-ae)
  (type-case WAE an-ae
    [num (n) n]
    [add (lhs rhs) (+ (calc lhs) (calc rhs))]
    [sub (lhs rhs) (- (calc lhs) (calc rhs))]
    [with (bound-id named-expr bound-body) 
          (calc (subst bound-body 
                       bound-id 
                       (num (calc named-expr))))]
    [id (v) (error 'calc "free identifier")]))

(test (calc (parse '3)) 3)
(test (calc (parse '{+ 3 4})) 7)
(test (calc (parse '{+ {- 3 4} 7})) 6)
; etc... the rest of the tests omited

The following is a translation of the above program into Python,
as best as I could. I have not included the simple s-expression
parser that I had to write as a front-end, but I'll post if if
anyone's interested. The type-case is replaced with a class
hierarchy, resulting in some extra syntax. Most of the docstrings
and tests have been omited as irrelevant.

import sexp
import operator

class Wae(object):
    """ An abstract base class for representing the abstract syntax tree of a
    Wae expression.
       
    """
    def calc(self):
        raise NotImplementedError

class Num(Wae):
    """ This subclass represents numbers.

    """
    def __init__(self, number):
        self.number = number
    def __repr__(self):
        return '<Num %d>' % self.number
    def subst_(self, sub_id, value):
        return self
    def calc(self):
        return self.number

class Id(Wae):
    """ This subclass represents an identifier.

    """
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return '<Id \'%s\'>' % self.name
    def subst_(self, sub_id, value):
        if self.name == sub_id:
            return value
        return self
    def calc(self):
        raise SyntaxError('Free identifier '+self.name)

class BinOp(Wae):
    """ This abstract class represents binary operations.

    """
    def __init__(self, lhs, rhs):
        self.lhs = lhs
        self.rhs = rhs
    def __repr__(self):
        return '<%s %r %r>' % (self.__class__.__name__, self.lhs, self.rhs)
    def subst_(self, sub_id, value):
        return self.__class__(self.lhs.subst_(sub_id, value),
                self.rhs.subst_(sub_id, value))
    def calc(self):
        return self.op(self.lhs.calc(), self.rhs.calc())

class Add(BinOp):
    """ This subclass represents an addition expression.

    """
    def __init__(self, lhs, rhs):
        super(Add, self).__init__(lhs, rhs)
        self.op = operator.add

class Sub(BinOp):
    """ This subclass represents a substraction expression.

    """
    def __init__(self, lhs, rhs):
        super(Sub, self).__init__(lhs, rhs)
        self.op = operator.sub

class With(Wae):
    """ This subclass represents the Wae binding expression, 'with'.

    """
    def __init__(self, bound_id, named_expr, bound_body):
        self.bound_id = bound_id
        self.named_expr = named_expr
        self.bound_body = bound_body
    def __repr__(self):
        return '<With \'%s\' %r %r>' % (self.bound_id, 
                self.named_expr, self.bound_body)
    def calc(self):
        return self.bound_body.subst_(
                self.bound_id, Num(self.named_expr.calc())).calc()
    def subst_(self, sub_id, value):
        if sub_id == self.bound_id:
            return With(self.bound_id,
                    self.named_expr.subst_(sub_id, value),
                    self.bound_body)
        else:
            return With(self.bound_id,
                    self.named_expr.subst_(sub_id, value),
                    self.bound_body.subst_(sub_id, value))

# parse : sexp --> Wae

# Wae grammar:
# Wae ::= Num
# Wae ::= ( + <Wae> <Wae> ) 
# Wae ::= ( - <Wae> <Wae> )
# Wae ::= ( with ( Id <Wae> ) <Wae> )
# Wae ::= Id
def parse(expr):
    """ Compile a string representing an WAE expression into an object that
    represents the abstract syntax tree of that expression. The AST provides a
    calc method that returns the result of evaluating the expression.

    >>> parse('(+ 4 5)')
    <Add <Num 4> <Num 5>>

    >>> parse('(+ 4)')
    Traceback (most recent call last):
        ...
    SyntaxError: Ill-formed expression

    >>> parse('(with (x) (+ x x))')
    Traceback (most recent call last):
        ...
    SyntaxError: Ill-formed expression

    >>> parse('3').calc()
    3

    >>> parse('(+ 3 4)').calc()
    7

    >>> parse('(with (x (+ 5 5)) (+ x x))').calc()
    20

    >>> parse('(with (x x) x)').calc()
    Traceback (most recent call last):
        ...
    SyntaxError: Free identifier x

    """
    def parse_ast(ast):
        if isinstance(ast, int):
            return Num(ast)
        elif isinstance(ast, str):
            return Id(ast)
        elif isinstance(ast, tuple):
            op = ast[0]
            if op in ['+', '-'] and len(ast) == 3:
                if op == '+':
                    return Add(parse_ast(ast[1]), parse_ast(ast[2]))
                elif op == '-':
                    return Sub(parse_ast(ast[1]), parse_ast(ast[2]))
            elif op == 'with' and len(ast) == 3 and len(ast[1]) == 2:
                return With(ast[1][0], parse_ast(ast[1][1]), parse_ast(ast[2]))
        raise SyntaxError("Ill-formed expression")
    return parse_ast(sexp.read(expr))

The sexp parser I wrote returns a tuple that represents the parse
tree of an s-expression, and recognizes only s-expressions,
strings and integers.

How can I make the Python more idiomatic Python?

How can I make it more "beautiful"? A type hierarchy seems
over-engineered in comparison to Scheme's type-case, but I
liked a cascade of isinstance calls (as in parse) even less.
The type hierarchy did allow me to factor out the code
duplication in the (sub ...) and (add ...) types of Scheme, and
seems like a nice benefit over type-case.

-- 
Neil Cerutti



More information about the Python-list mailing list