[pypy-svn] r66597 - in pypy/branch/parser-compiler/pypy/interpreter: astcompiler pyparser test

benjamin at codespeak.net benjamin at codespeak.net
Fri Jul 24 17:26:31 CEST 2009


Author: benjamin
Date: Fri Jul 24 17:26:29 2009
New Revision: 66597

Modified:
   pypy/branch/parser-compiler/pypy/interpreter/astcompiler/assemble.py
   pypy/branch/parser-compiler/pypy/interpreter/astcompiler/astbuilder.py
   pypy/branch/parser-compiler/pypy/interpreter/astcompiler/codegen.py
   pypy/branch/parser-compiler/pypy/interpreter/astcompiler/misc.py
   pypy/branch/parser-compiler/pypy/interpreter/astcompiler/optimize.py
   pypy/branch/parser-compiler/pypy/interpreter/astcompiler/symtable.py
   pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py
   pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py
   pypy/branch/parser-compiler/pypy/interpreter/test/test_compiler.py
Log:
add lots of comments and docstrings

Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/assemble.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/assemble.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/assemble.py	Fri Jul 24 17:26:29 2009
@@ -11,6 +11,7 @@
 
 
 class Instruction(object):
+    """Represents a single opcode."""
 
     def __init__(self, opcode, arg=0):
         self.opcode = opcode
@@ -19,6 +20,7 @@
         self.has_jump = False
 
     def size(self):
+        """Return the size of bytes of this instruction when it is encoded."""
         if self.opcode >= ops.HAVE_ARGUMENT:
             if self.arg > 0xFFFF:
                 return 6
@@ -28,6 +30,10 @@
             return 1
 
     def jump_to(self, target, absolute=False):
+        """Indicate the target this jump instruction.
+
+        The opcode must be a JUMP opcode.
+        """
         self.jump = (target, absolute)
         self.has_jump = True
 
@@ -45,6 +51,12 @@
 
 
 class Block(object):
+    """A basic control flow block.
+
+    It has one entry point and several possible exit points.  Its instructions
+    may be jumps to other blocks, or if control flow reaches the end of the
+    block, it continues to next_block.
+    """
 
     def __init__(self):
         self.instructions = []
@@ -65,18 +77,21 @@
         self.marked = True
 
     def post_order(self):
+        """Return this block and its children in post order."""
         blocks = []
         self._post_order(blocks)
         blocks.reverse()
         return blocks
 
     def code_size(self):
+        """Return the encoded size of all the instructions in this block."""
         i = 0
         for instr in self.instructions:
             i += instr.size()
         return i
 
     def get_code(self):
+        """Encode the instructions in this block into bytecode."""
         code = []
         for instr in self.instructions:
             opcode = instr.opcode
@@ -115,6 +130,7 @@
 
 
 class PythonCodeMaker(ast.ASTVisitor):
+    """Knows how to assemble a PyCode object."""
 
     def __init__(self, space, name, first_lineno, scope, compile_info):
         self.space = space
@@ -138,10 +154,12 @@
         return Block()
 
     def use_block(self, block):
+        """Start emitting bytecode into block."""
         self.current_block = block
         self.instrs = block.instructions
 
     def use_next_block(self, block=None):
+        """Set this block as the next_block for the last and use it."""
         if block is None:
             block = self.new_block()
         self.current_block.next_block = block
@@ -149,6 +167,7 @@
         return block
 
     def emit_op(self, op):
+        """Emit an opcode without an argument."""
         instr = Instruction(op)
         if not self.lineno_set:
             instr.lineno = self.lineno
@@ -159,6 +178,7 @@
         return instr
 
     def emit_op_arg(self, op, arg):
+        """Emit an opcode with an integer argument."""
         instr = Instruction(op, arg)
         if not self.lineno_set:
             instr.lineno = self.lineno
@@ -166,12 +186,15 @@
         self.instrs.append(instr)
 
     def emit_op_name(self, op, container, name):
+        """Emit an opcode referencing a name."""
         self.emit_op_arg(op, self.add_name(container, name))
 
     def emit_jump(self, op, block_to, absolute=False):
+        """Emit a jump opcode to another block."""
         self.emit_op(op).jump_to(block_to, absolute)
 
     def add_name(self, container, name):
+        """Get the index of a name in container."""
         name = self.scope.mangle(name)
         try:
             index = container[name]
@@ -181,7 +204,10 @@
         return index
 
     def add_const(self, obj, w_key=None):
+        """Add a W_Root to the constant array and return its location."""
         space = self.space
+        # To avoid confusing equal but separate types, we hash store the type of
+        # the constant in the dictionary.
         if w_key is None:
             w_key = space.newtuple([obj, space.type(obj)])
         w_len = space.finditem(self.w_consts, w_key)
@@ -195,17 +221,23 @@
         self.emit_op_arg(ops.LOAD_CONST, index)
 
     def update_position(self, lineno, force=False):
+        """Possibly change the lineno for the next instructions."""
         if force or lineno > self.lineno:
-            if force and lineno < self.lineno:
-                raise AssertionError
             self.lineno = lineno
             self.lineno_set = False
 
     def _resolve_block_targets(self, blocks):
+        """"Compute the arguments of jump instructions."""
         last_extended_arg_count = 0
+        # The reason for this loop is extended jumps.  EXTENDED_ARG extends the
+        # bytecode size, so it might invalidate the offsets we've already given.
+        # Thus we have to loop until the number of extended args is stable.  Any
+        # extended jump at all is extremely rare, so performance is not too
+        # concerning.
         while True:
             extended_arg_count = 0
             offset = 0
+            # Calculate the code offset of each block.
             for block in blocks:
                 block.offset = offset
                 offset += block.code_size()
@@ -216,6 +248,8 @@
                     if instr.has_jump:
                         target, absolute = instr.jump
                         op = instr.opcode
+                        # Optimize an unconditional jump going to another
+                        # unconditional jump.
                         if op == ops.JUMP_ABSOLUTE or op == ops.JUMP_FORWARD:
                             if target.instructions:
                                 target_op = target.instructions[0].opcode
@@ -236,6 +270,7 @@
                 last_extended_arg_count = extended_arg_count
 
     def _build_consts_array(self):
+        """Turn the applevel constants dictionary into a list."""
         w_consts = self.w_consts
         space = self.space
         consts_w = [space.w_None] * space.int_w(space.len(w_consts))
@@ -253,9 +288,11 @@
         return consts_w
 
     def _get_code_flags(self):
+        """Get an extra flags that should be attached to the code object."""
         raise NotImplementedError
 
     def _stacksize(self, blocks):
+        """Compute co_stacksize."""
         for block in blocks:
             block.marked = False
             block.initial_depth = -1000
@@ -275,6 +312,7 @@
                                                              depth, max_depth)
                 if instr.opcode == ops.JUMP_ABSOLUTE or \
                         instr.opcode == ops.JUMP_FORWARD:
+                    # Nothing more can occur.
                     break
         if block.next_block:
             max_depth = self._recursive_stack_depth_walk(block.next_block,
@@ -283,6 +321,7 @@
         return max_depth
 
     def _build_lnotab(self, blocks):
+        """Build the line number table for tracebacks and tracing."""
         current_line = self.first_lineno
         current_off = 0
         table = []
@@ -323,11 +362,14 @@
         return ''.join(table)
 
     def assemble(self):
+        """Build a PyCode object."""
+        # Unless it's interactive, every code object must in an a return.
         if not self.current_block.have_return:
             self.use_next_block()
             if self.add_none_to_final_return:
                 self.load_const(self.space.w_None)
             self.emit_op(ops.RETURN_VALUE)
+        # Set the first lineno if it is not already explicitly set.
         if self.first_lineno == -1:
             if self.first_block.instructions:
                 self.first_lineno = self.first_block.instructions[0].lineno
@@ -555,8 +597,10 @@
 
 
 def _opcode_stack_effect(op, arg):
+    """Return the stack effect of a opcode an its argument."""
     if we_are_translated():
         for possible_op in ops.unrolling_opcode_descs:
+            # EXTENDED_ARG should never get in here.
             if possible_op.index == ops.EXTENDED_ARG:
                 continue
             if op == possible_op.index:

Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/astbuilder.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/astbuilder.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/astbuilder.py	Fri Jul 24 17:26:29 2009
@@ -6,8 +6,9 @@
 from pypy.rlib.objectmodel import specialize
 
 
-def ast_from_node(space, n, compile_info):
-    return ASTBuilder(space, n, compile_info).build_ast()
+def ast_from_node(space, node, compile_info):
+    """Turn a parse tree, node, to AST."""
+    return ASTBuilder(space, node, compile_info).build_ast()
 
 
 augassign_operator_map = {
@@ -48,6 +49,7 @@
         self.root_node = n
 
     def build_ast(self):
+        """Convert an top level parse tree node into an AST mod."""
         n = self.root_node
         if n.type == syms.file_input:
             stmts = []
@@ -88,6 +90,7 @@
             raise AssertionError("unknown root node")
 
     def number_of_statements(self, n):
+        """Compute the number of AST statements contained in a node."""
         stmt_type = n.type
         if stmt_type == syms.compound_stmt:
             return 1
@@ -100,10 +103,12 @@
             raise AssertionError("non-statement node")
 
     def error(self, msg, n):
+        """Raise a SyntaxError with the lineno and column set to n's."""
         raise SyntaxError(msg, n.lineno, n.column,
                           filename=self.compile_info.filename)
 
     def check_forbidden_name(self, name, node):
+        """Raise an error if the name cannot be assigned to."""
         if name == "None":
             self.error("assignment to None", node)
         if name == "__debug__":
@@ -111,6 +116,7 @@
         # XXX Warn about using True and False
 
     def set_context(self, expr, ctx, node):
+        """Set the context of an expression to Store or Del if possible."""
         error = None
         sequence = None
         if isinstance(expr, ast.Attribute):
@@ -227,6 +233,8 @@
             if import_name_type == syms.import_as_name:
                 name = import_name.children[0].value
                 if len(import_name.children) == 3:
+                    # 'as' is not yet a keyword in Python 2.5, so the grammar
+                    # just specifies a NAME token.  We check it manually here.
                     if import_name.children[1].value != "as":
                         self.error("must use 'as' in import", import_name)
                     as_name = import_name.children[2].value
@@ -492,6 +500,7 @@
         return ast.With(test, target, body, with_node.lineno, with_node.column)
 
     def handle_with_var(self, with_var_node):
+        # The grammar doesn't require 'as', so check it manually.
         if with_var_node.children[0].value != "as":
             self.error("expected \"with [expr] as [var]\"", with_var_node)
         return self.handle_expr(with_var_node.children[1])
@@ -870,7 +879,7 @@
         return result
 
     def handle_factor(self, factor_node):
-        # Fold '-' on constants.
+        # Fold '-' on constant numbers.
         if factor_node.children[0].type == tokens.MINUS and \
                 len(factor_node.children) == 2:
             factor = factor_node.children[1]
@@ -1100,6 +1109,7 @@
             encoding = self.compile_info.encoding
             sub_strings_w = [parsestring.parsestr(space, encoding, s.value)
                              for s in atom_node.children]
+            # This implements implicit string concatenation.
             if len(sub_strings_w) > 1:
                 w_sub_strings = space.newlist(sub_strings_w)
                 w_join = space.getattr(space.wrap(""), space.wrap("join"))

Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/codegen.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/codegen.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/codegen.py	Fri Jul 24 17:26:29 2009
@@ -2,6 +2,11 @@
 Generate Python bytecode from a Abstract Syntax Tree.
 """
 
+# NOTE TO READERS: All the ugly and "obvious" isinstance assertions here are to
+# help the annotator.  To it, unfortunately, everything is not so obvious.  If
+# you figure out a way to remove them, great, but try a translation first,
+# please.
+
 from pypy.interpreter.astcompiler import (ast2 as ast, assemble, symtable,
                                           consts, misc)
 from pypy.interpreter.astcompiler import optimize, asthelpers # For side effects
@@ -13,6 +18,7 @@
 
 
 def compile_ast(space, module, info):
+    """Generate a code object from AST."""
     symbols = symtable.SymtableBuilder(space, module, info)
     return TopLevelCodeGenerator(space, module, symbols, info).assemble()
 
@@ -106,6 +112,7 @@
 })
 
 
+# These are frame blocks.
 F_BLOCK_LOOP = 0
 F_BLOCK_EXCEPT = 1
 F_BLOCK_FINALLY = 2
@@ -113,6 +120,11 @@
 
 
 class PythonCodeGenerator(assemble.PythonCodeMaker):
+    """Base code generator.
+
+    A subclass of this is created for every scope to be compiled.  It walks
+    across the AST tree generating bytecode as needed.
+    """
 
     def __init__(self, space, name, tree, lineno, symbols, compile_info):
         self.scope = symbols.find_scope(tree)
@@ -125,15 +137,21 @@
         self._compile(tree)
 
     def _compile(self, tree):
+        """Override in subclasses to compile a scope."""
         raise NotImplementedError
 
     def current_temporary_name(self):
+        """Return the name of the current temporary variable.
+
+        This must be in sync with the one during symbol table building.
+        """
         name = "_[%d]" % (self.temporary_name_counter,)
         self.temporary_name_counter += 1
         assert self.scope.lookup(name) != symtable.SCOPE_UNKNOWN
         return name
 
     def sub_scope(self, kind, name, node, lineno):
+        """Convenience function for compiling a sub scope."""
         generator = kind(self.space, name, node, lineno, self.symbols,
                          self.compile_info)
         return generator.assemble()
@@ -151,6 +169,7 @@
                           filename=self.compile_info.filename)
 
     def name_op(self, identifier, ctx):
+        """Generate an operation appropiate for the scope of the identifier."""
         scope = self.scope.lookup(identifier)
         op = ops.NOP
         container = self.names
@@ -182,9 +201,11 @@
         return isinstance(node, ast.Expr) and isinstance(node.value, ast.Str)
 
     def _get_code_flags(self):
+        # Default for everything but module scopes.
         return consts.CO_NEWLOCALS
 
     def _handle_body(self, body):
+        """Compile a list of statements, handling doc strings if needed."""
         if body:
             start = 0
             if self.is_docstring(body[0]):
@@ -212,8 +233,10 @@
         mod.body.walkabout(self)
 
     def _make_function(self, code, num_defaults=0):
+        """Emit the opcodes to turn a code object into a function."""
         code_index = self.add_const(code)
         if code.co_freevars:
+            # Load cell and free vars to pass on.
             for free in code.co_freevars:
                 free_scope = self.scope.lookup(free)
                 if free_scope == symtable.SCOPE_CELL:
@@ -230,6 +253,7 @@
 
     def visit_FunctionDef(self, func):
         self.update_position(func.lineno, True)
+        # Load decorators first, but apply them after the function is created.
         if func.decorators:
             self.visit_sequence(func.decorators)
         if func.args.defaults:
@@ -240,6 +264,7 @@
         code = self.sub_scope(FunctionCodeGenerator, func.name, func,
                               func.lineno)
         self._make_function(code, num_defaults)
+        # Apply decorators.
         if func.decorators:
             for i in range(len(func.decorators)):
                 self.emit_op_arg(ops.CALL_FUNCTION, 1)
@@ -403,6 +428,7 @@
         if not self.frame_blocks:
             self.error("'continue' not properly in loop", cont)
         current_block, block = self.frame_blocks[-1]
+        # Continue cannot be in a finally block.
         if current_block == F_BLOCK_LOOP:
             self.emit_jump(ops.JUMP_ABSOLUTE, block, True)
         elif current_block == F_BLOCK_EXCEPT or \
@@ -524,6 +550,7 @@
         self.visit_sequence(tf.body)
         self.emit_op(ops.POP_BLOCK)
         self.pop_frame_block(F_BLOCK_FINALLY, body)
+        # Indicates there was no exception.
         self.load_const(self.space.w_None)
         self.use_next_block(end)
         self.push_frame_block(F_BLOCK_FINALLY_END, end)
@@ -559,6 +586,8 @@
             self.load_const(self.space.wrap(level))
             self.load_const(self.space.w_None)
             self.emit_op_name(ops.IMPORT_NAME, self.names, alias.name)
+            # If there's no asname then we store the root module.  If there is
+            # an asname, _import_as stores the last module of the chain into it.
             if alias.asname:
                 self._import_as(alias)
             else:
@@ -575,6 +604,7 @@
         first = imp.names[0]
         assert isinstance(first, ast.alias)
         star_import = len(imp.names) == 1 and first.name == "*"
+        # Various error checking for future imports.
         if imp.module == "__future__":
             last_line, last_offset = self.compile_info.last_future_import
             if imp.lineno > last_line or \
@@ -603,6 +633,7 @@
         if imp.module:
             mod_name = imp.module
         else:
+            # In the case of a relative import.
             mod_name = ""
         self.emit_op_name(ops.IMPORT_NAME, self.names, mod_name)
         if star_import:
@@ -630,6 +661,7 @@
             assign.targets[i].walkabout(self)
 
     def _optimize_unpacking(self, assign):
+        """Try to optimize out BUILD_TUPLE and UNPACK_SEQUENCE opcodes."""
         if len(assign.targets) != 1:
             return False
         targets = assign.targets[0].as_node_list(self.space)
@@ -745,6 +777,7 @@
         if self.interactive:
             expr.value.walkabout(self)
             self.emit_op(ops.PRINT_EXPR)
+        # Only compile if the expression isn't constant.
         elif not expr.value.constant:
             expr.value.walkabout(self)
             self.emit_op(ops.POP_TOP)
@@ -1230,6 +1263,7 @@
 
     def _compile(self, func):
         assert isinstance(func, ast.FunctionDef)
+        # If there's a docstring, store it as the first constant.
         if self.is_docstring(func.body[0]):
             doc_string = func.body[0]
             assert isinstance(doc_string, ast.Expr)

Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/misc.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/misc.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/misc.py	Fri Jul 24 17:26:29 2009
@@ -16,6 +16,10 @@
 del app
 
 def syntax_warning(space, msg, fn, lineno, offset):
+    """Raise an applevel SyntaxWarning.
+
+    If the user has set this warning to raise an error, a SyntaxError will be
+    raised."""
     w_msg = space.wrap(msg)
     w_filename = space.wrap(fn)
     w_lineno = space.wrap(lineno)
@@ -24,6 +28,7 @@
 
 
 def dict_to_switch(d):
+    """Convert of dictionary with integer keys to a switch statement."""
     def lookup(query):
         if we_are_translated():
             for key, value in unrolling_iteritems:

Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/optimize.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/optimize.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/optimize.py	Fri Jul 24 17:26:29 2009
@@ -1,3 +1,4 @@
+"""codegen helpers and AST constant folding."""
 import sys
 import itertools
 
@@ -19,9 +20,11 @@
 class __extend__(ast.AST):
 
     def as_constant_truth(self, space):
+        """Return the truth of this node if known."""
         raise AssertionError("only for expressions")
 
     def as_constant(self):
+        """Return the value of this node as a wrapped constant if possible."""
         raise AssertionError("only for expressions")
 
     def accept_jump_if(self, gen, condition, target):
@@ -149,6 +152,7 @@
 
 
 class OptimizingVisitor(ast.ASTVisitor):
+    """Constant folds AST."""
 
     def __init__(self, space, compile_info):
         self.space = space
@@ -163,6 +167,9 @@
             right = binop.right.as_constant()
             if right is not None:
                 op = binop.op
+                # Can't fold straight division without "from __future_ import
+                # division" because it might be affected at runtime by the -Q
+                # flag.
                 if op == ast.Div and \
                         not self.compile_info.flags & consts.CO_FUTURE_DIVISION:
                     return binop
@@ -173,9 +180,12 @@
                             break
                     else:
                         raise AssertionError("unknown binary operation")
+                # Let all errors be found at runtime.
                 except OperationError:
                     pass
                 else:
+                    # To avoid blowing up the size of pyc files, we only fold
+                    # reasonably sized sequences.
                     try:
                         w_len = self.space.len(w_const)
                     except OperationError:
@@ -244,12 +254,15 @@
         return rep
 
     def visit_Name(self, name):
+        # Turn loading None into a constant lookup.  Eventaully, we can do this
+        # for True and False, too.
         if name.id == "None":
             assert name.ctx == ast.Load
             return ast.Const(self.space.w_None, name.lineno, name.col_offset)
         return name
 
     def visit_Tuple(self, tup):
+        """Try to turn tuple building into a constant."""
         if tup.elts:
             consts_w = [None]*len(tup.elts)
             for i in range(len(tup.elts)):

Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/symtable.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/symtable.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/symtable.py	Fri Jul 24 17:26:29 2009
@@ -44,16 +44,22 @@
         self.nested = False
 
     def lookup(self, name):
+        """Find the scope of identifier 'name'."""
         return self.symbols.get(self.mangle(name), SCOPE_UNKNOWN)
 
     def lookup_role(self, name):
         return self.roles.get(self.mangle(name), SYM_BLANK)
 
     def new_temporary_name(self):
+        """Return the next temporary name.
+
+        This must be in sync with PythonCodeGenerator's counter.
+        """
         self.note_symbol("_[%d]" % (self.temp_name_counter,), SYM_ASSIGNED)
         self.temp_name_counter += 1
 
     def note_symbol(self, identifier, role):
+        """Record that identifier occurs in this scope."""
         mangled = self.mangle(identifier)
         new_role = role
         if mangled in self.roles:
@@ -69,17 +75,21 @@
         return mangled
 
     def note_yield(self, yield_node):
+        """Called when a yield is found."""
         raise SyntaxError("'yield' outside function", yield_node.lineno,
                           yield_node.col_offset)
 
     def note_return(self, ret):
+        """Called when a return statement is found."""
         raise SyntaxError("return outside function", ret.lineno,
                           ret.col_offset)
 
     def note_exec(self, exc):
+        """Called when an exec statement is found."""
         self.has_exec = True
 
     def note_import_star(self, imp):
+        """Called when a start import is found."""
         pass
 
     def mangle(self, name):
@@ -89,10 +99,12 @@
             return name
 
     def add_child(self, child_scope):
+        """Note a new child scope."""
         child_scope.parent = self
         self.children.append(child_scope)
 
     def _finalize_name(self, name, flags, local, bound, free, globs):
+        """Decide on the scope of a name."""
         if flags & SYM_GLOBAL:
             if flags & SYM_PARAM:
                 err = "name '%s' is both local and global" % (name,)
@@ -124,11 +136,13 @@
             self.symbols[name] = SCOPE_GLOBAL_IMPLICIT
 
     def _pass_on_bindings(self, local, bound, globs, new_bound, new_globs):
+        """Allow child scopes to see names bound here and in outer scopes."""
         new_globs.update(globs)
         if bound:
             new_bound.update(bound)
 
     def _finalize_cells(self, free):
+        """Hook for FunctionScope."""
         pass
 
     def _check_optimization(self):
@@ -137,6 +151,7 @@
     _hide_bound_from_nested_scopes = False
 
     def finalize(self, bound, free, globs):
+        """Enter final bookeeping data in to self.symbols."""
         self.symbols = {}
         local = {}
         new_globs = {}
@@ -150,6 +165,8 @@
             self._pass_on_bindings(local, bound, globs, new_bound, new_globs)
         child_frees = {}
         for child in self.children:
+            # Symbol dictionaries are copied to avoid having child scopes
+            # pollute each other's.
             child_free = new_free.copy()
             child.finalize(new_bound.copy(), child_free, new_globs.copy())
             child_frees.update(child_free)
@@ -279,6 +296,7 @@
 
 
 class SymtableBuilder(ast.GenericASTVisitor):
+    """Find symbol information from AST."""
 
     def __init__(self, space, module, compile_info):
         self.space = space
@@ -300,6 +318,7 @@
         assert not self.stack
 
     def push_scope(self, scope, node):
+        """Push a child scope."""
         if self.stack:
             self.stack[-1].add_child(scope)
         self.stack.append(scope)
@@ -315,13 +334,16 @@
             self.scope = None
 
     def find_scope(self, scope_node):
+        """Lookup the scope for a given AST node."""
         return self.scopes[scope_node]
 
     def implicit_arg(self, pos):
+        """Note a implicit arg for implicit tuple unpacking."""
         name = ".%d" % (pos,)
         self.note_symbol(name, SYM_PARAM)
 
     def note_symbol(self, identifier, role):
+        """Note the identifer on the current scope."""
         mangled = self.scope.note_symbol(identifier, role)
         if role & SYM_GLOBAL:
             if mangled in self.globs:
@@ -330,6 +352,7 @@
 
     def visit_FunctionDef(self, func):
         self.note_symbol(func.name, SYM_ASSIGNED)
+        # Function defaults and decorators happen in the outer scope.
         if func.args.defaults:
             self.visit_sequence(func.args.defaults)
         if func.decorators:
@@ -449,6 +472,8 @@
             if isinstance(arg, ast.Name):
                 self.note_symbol(arg.id, SYM_PARAM)
             elif isinstance(arg, ast.Tuple):
+                # Tuple unpacking in the argument list.  Add a secret variable
+                # name to recieve the tuple with.
                 if is_toplevel:
                     self.implicit_arg(i)
             else:

Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py	Fri Jul 24 17:26:29 2009
@@ -89,6 +89,10 @@
         self.stack = None
 
     def prepare(self, start=-1):
+        """Setup the parser for parsing.
+
+        Takes the starting symbol as an argument.
+        """
         if start == -1:
             start = self.grammar.start
         self.root = None
@@ -106,28 +110,37 @@
             for i, next_state in arcs:
                 sym_id = self.grammar.labels[i]
                 if label_index == i:
+                    # We matched a non-terminal.
                     self.shift(next_state, token_type, value, lineno, column)
                     state = states[next_state]
+                    # While the only possible action is to accept, pop nodes off
+                    # the stack.
                     while state[1] and not state[0]:
                         self.pop()
                         if not self.stack:
+                            # Parsing is done.
                             return True
                         dfa, state_index, node = self.stack[-1]
                         state = dfa[0][state_index]
                     return False
                 elif sym_id >= 256:
                     sub_node_dfa = self.grammar.dfas[sym_id]
+                    # Check if this token can start a child node.
                     if label_index in sub_node_dfa[1]:
                         self.push(sub_node_dfa, next_state, sym_id, lineno,
                                   column)
                         break
             else:
+                # We failed to find any arcs to another state, so unless this
+                # state is accepting, it's invalid input.
                 if is_accepting:
                     self.pop()
                     if not self.stack:
                         raise ParseError("too much input", token_type, value,
                                          lineno, column, line)
                 else:
+                    # If only one possible input would satisfy, attach it to the
+                    # error.
                     if len(arcs) == 1:
                         expected = sym_id
                     else:
@@ -136,6 +149,7 @@
                                      column, line, expected)
 
     def classify(self, token_type, value, lineno, column, line):
+        """Find the label for a token."""
         if token_type == self.grammar.KEYWORD_TOKEN:
             label_index = self.grammar.keyword_ids.get(value, -1)
             if label_index != -1:
@@ -147,18 +161,21 @@
         return label_index
 
     def shift(self, next_state, token_type, value, lineno, column):
+        """Shift a non-terminal and prepare for the next state."""
         dfa, state, node = self.stack[-1]
         new_node = Node(token_type, value, None, lineno, column)
         node.children.append(new_node)
         self.stack[-1] = (dfa, next_state, node)
 
     def push(self, next_dfa, next_state, node_type, lineno, column):
+        """Push a terminal and adjust the current state."""
         dfa, state, node = self.stack[-1]
         new_node = Node(node_type, None, [], lineno, column)
         self.stack[-1] = (dfa, next_state, node)
         self.stack.append((next_dfa, 0, new_node))
 
     def pop(self):
+        """Pop an entry off the stack and make its node a child of the last."""
         dfa, state, node = self.stack.pop()
         if self.stack:
             self.stack[-1][2].children.append(node)

Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py	Fri Jul 24 17:26:29 2009
@@ -57,6 +57,15 @@
 
 
 class CompileInfo(object):
+    """Stores information about the source being compiled.
+
+    * filename: The filename of the source.
+    * mode: The parse mode to use. ('exec', 'eval', or 'single')
+    * flags: Parser and compiler flags.
+    * encoding: The source encoding.
+    * last_future_import: The line number and offset of the last __future__
+      import.
+    """
 
     def __init__(self, filename, mode="exec", flags=0, future_pos=(0, 0)):
         self.filename = filename
@@ -79,7 +88,11 @@
         self.space = space
 
     def parse_source(self, textsrc, compile_info):
-        """Parse a python source according to goal"""
+        """Main entry point for parsing Python source.
+
+        Everything from decoding the source to tokenizing to building the parse
+        tree is handled here.
+        """
         # Detect source encoding.
         enc = None
         if textsrc.startswith("\xEF\xBB\xBF"):
@@ -111,10 +124,18 @@
                     raise
 
         flags = compile_info.flags
+
+        # In order to not raise errors when 'as' or 'with' are used as names in
+        # code that does not explicitly enable the with statement, we have two
+        # grammars.  One with 'as' and 'with' and keywords and one without.
+        # This is far better than CPython, where the parser is hacked up to
+        # check for __future__ imports and recognize new keywords accordingly.
         if flags & consts.CO_FUTURE_WITH_STATEMENT:
             self.grammar = pygram.python_grammar
         else:
             self.grammar = pygram.python_grammar_no_with_statement
+
+        # The tokenizer is very picky about how it wants its input.
         source_lines = textsrc.splitlines(True)
         if source_lines and not source_lines[-1].endswith("\n"):
             source_lines[-1] += '\n'
@@ -129,6 +150,8 @@
                 if self.add_token(tp, value, lineno, column, line):
                     break
         except parser.ParseError, e:
+            # Catch parse errors, pretty them up and reraise them as a
+            # SyntaxError.
             new_err = error.IndentationError
             if tp == pygram.tokens.INDENT:
                 msg = "unexpected indent"
@@ -142,6 +165,7 @@
         else:
             tree = self.root
         finally:
+            # Avoid hanging onto the tree.
             self.root = None
         if enc is not None:
             compile_info.encoding = enc

Modified: pypy/branch/parser-compiler/pypy/interpreter/test/test_compiler.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/test/test_compiler.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/test/test_compiler.py	Fri Jul 24 17:26:29 2009
@@ -744,6 +744,7 @@
     def test_none_constant(self):
         import opcode
         co = compile("def f(): return None", "<test>", "exec").co_consts[0]
+        assert "None" not in co.co_names
         co = co.co_code
         op = ord(co[0]) + (ord(co[1]) << 8)
         assert op == opcode.opmap["LOAD_CONST"]



More information about the Pypy-commit mailing list