[pypy-svn] r66597 - in pypy/branch/parser-compiler/pypy/interpreter: astcompiler pyparser test
benjamin at codespeak.net
benjamin at codespeak.net
Fri Jul 24 17:26:31 CEST 2009
Author: benjamin
Date: Fri Jul 24 17:26:29 2009
New Revision: 66597
Modified:
pypy/branch/parser-compiler/pypy/interpreter/astcompiler/assemble.py
pypy/branch/parser-compiler/pypy/interpreter/astcompiler/astbuilder.py
pypy/branch/parser-compiler/pypy/interpreter/astcompiler/codegen.py
pypy/branch/parser-compiler/pypy/interpreter/astcompiler/misc.py
pypy/branch/parser-compiler/pypy/interpreter/astcompiler/optimize.py
pypy/branch/parser-compiler/pypy/interpreter/astcompiler/symtable.py
pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py
pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py
pypy/branch/parser-compiler/pypy/interpreter/test/test_compiler.py
Log:
add lots of comments and docstrings
Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/assemble.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/assemble.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/assemble.py Fri Jul 24 17:26:29 2009
@@ -11,6 +11,7 @@
class Instruction(object):
+ """Represents a single opcode."""
def __init__(self, opcode, arg=0):
self.opcode = opcode
@@ -19,6 +20,7 @@
self.has_jump = False
def size(self):
+ """Return the size of bytes of this instruction when it is encoded."""
if self.opcode >= ops.HAVE_ARGUMENT:
if self.arg > 0xFFFF:
return 6
@@ -28,6 +30,10 @@
return 1
def jump_to(self, target, absolute=False):
+ """Indicate the target this jump instruction.
+
+ The opcode must be a JUMP opcode.
+ """
self.jump = (target, absolute)
self.has_jump = True
@@ -45,6 +51,12 @@
class Block(object):
+ """A basic control flow block.
+
+ It has one entry point and several possible exit points. Its instructions
+ may be jumps to other blocks, or if control flow reaches the end of the
+ block, it continues to next_block.
+ """
def __init__(self):
self.instructions = []
@@ -65,18 +77,21 @@
self.marked = True
def post_order(self):
+ """Return this block and its children in post order."""
blocks = []
self._post_order(blocks)
blocks.reverse()
return blocks
def code_size(self):
+ """Return the encoded size of all the instructions in this block."""
i = 0
for instr in self.instructions:
i += instr.size()
return i
def get_code(self):
+ """Encode the instructions in this block into bytecode."""
code = []
for instr in self.instructions:
opcode = instr.opcode
@@ -115,6 +130,7 @@
class PythonCodeMaker(ast.ASTVisitor):
+ """Knows how to assemble a PyCode object."""
def __init__(self, space, name, first_lineno, scope, compile_info):
self.space = space
@@ -138,10 +154,12 @@
return Block()
def use_block(self, block):
+ """Start emitting bytecode into block."""
self.current_block = block
self.instrs = block.instructions
def use_next_block(self, block=None):
+ """Set this block as the next_block for the last and use it."""
if block is None:
block = self.new_block()
self.current_block.next_block = block
@@ -149,6 +167,7 @@
return block
def emit_op(self, op):
+ """Emit an opcode without an argument."""
instr = Instruction(op)
if not self.lineno_set:
instr.lineno = self.lineno
@@ -159,6 +178,7 @@
return instr
def emit_op_arg(self, op, arg):
+ """Emit an opcode with an integer argument."""
instr = Instruction(op, arg)
if not self.lineno_set:
instr.lineno = self.lineno
@@ -166,12 +186,15 @@
self.instrs.append(instr)
def emit_op_name(self, op, container, name):
+ """Emit an opcode referencing a name."""
self.emit_op_arg(op, self.add_name(container, name))
def emit_jump(self, op, block_to, absolute=False):
+ """Emit a jump opcode to another block."""
self.emit_op(op).jump_to(block_to, absolute)
def add_name(self, container, name):
+ """Get the index of a name in container."""
name = self.scope.mangle(name)
try:
index = container[name]
@@ -181,7 +204,10 @@
return index
def add_const(self, obj, w_key=None):
+ """Add a W_Root to the constant array and return its location."""
space = self.space
+ # To avoid confusing equal but separate types, we hash store the type of
+ # the constant in the dictionary.
if w_key is None:
w_key = space.newtuple([obj, space.type(obj)])
w_len = space.finditem(self.w_consts, w_key)
@@ -195,17 +221,23 @@
self.emit_op_arg(ops.LOAD_CONST, index)
def update_position(self, lineno, force=False):
+ """Possibly change the lineno for the next instructions."""
if force or lineno > self.lineno:
- if force and lineno < self.lineno:
- raise AssertionError
self.lineno = lineno
self.lineno_set = False
def _resolve_block_targets(self, blocks):
+ """"Compute the arguments of jump instructions."""
last_extended_arg_count = 0
+ # The reason for this loop is extended jumps. EXTENDED_ARG extends the
+ # bytecode size, so it might invalidate the offsets we've already given.
+ # Thus we have to loop until the number of extended args is stable. Any
+ # extended jump at all is extremely rare, so performance is not too
+ # concerning.
while True:
extended_arg_count = 0
offset = 0
+ # Calculate the code offset of each block.
for block in blocks:
block.offset = offset
offset += block.code_size()
@@ -216,6 +248,8 @@
if instr.has_jump:
target, absolute = instr.jump
op = instr.opcode
+ # Optimize an unconditional jump going to another
+ # unconditional jump.
if op == ops.JUMP_ABSOLUTE or op == ops.JUMP_FORWARD:
if target.instructions:
target_op = target.instructions[0].opcode
@@ -236,6 +270,7 @@
last_extended_arg_count = extended_arg_count
def _build_consts_array(self):
+ """Turn the applevel constants dictionary into a list."""
w_consts = self.w_consts
space = self.space
consts_w = [space.w_None] * space.int_w(space.len(w_consts))
@@ -253,9 +288,11 @@
return consts_w
def _get_code_flags(self):
+ """Get an extra flags that should be attached to the code object."""
raise NotImplementedError
def _stacksize(self, blocks):
+ """Compute co_stacksize."""
for block in blocks:
block.marked = False
block.initial_depth = -1000
@@ -275,6 +312,7 @@
depth, max_depth)
if instr.opcode == ops.JUMP_ABSOLUTE or \
instr.opcode == ops.JUMP_FORWARD:
+ # Nothing more can occur.
break
if block.next_block:
max_depth = self._recursive_stack_depth_walk(block.next_block,
@@ -283,6 +321,7 @@
return max_depth
def _build_lnotab(self, blocks):
+ """Build the line number table for tracebacks and tracing."""
current_line = self.first_lineno
current_off = 0
table = []
@@ -323,11 +362,14 @@
return ''.join(table)
def assemble(self):
+ """Build a PyCode object."""
+ # Unless it's interactive, every code object must in an a return.
if not self.current_block.have_return:
self.use_next_block()
if self.add_none_to_final_return:
self.load_const(self.space.w_None)
self.emit_op(ops.RETURN_VALUE)
+ # Set the first lineno if it is not already explicitly set.
if self.first_lineno == -1:
if self.first_block.instructions:
self.first_lineno = self.first_block.instructions[0].lineno
@@ -555,8 +597,10 @@
def _opcode_stack_effect(op, arg):
+ """Return the stack effect of a opcode an its argument."""
if we_are_translated():
for possible_op in ops.unrolling_opcode_descs:
+ # EXTENDED_ARG should never get in here.
if possible_op.index == ops.EXTENDED_ARG:
continue
if op == possible_op.index:
Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/astbuilder.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/astbuilder.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/astbuilder.py Fri Jul 24 17:26:29 2009
@@ -6,8 +6,9 @@
from pypy.rlib.objectmodel import specialize
-def ast_from_node(space, n, compile_info):
- return ASTBuilder(space, n, compile_info).build_ast()
+def ast_from_node(space, node, compile_info):
+ """Turn a parse tree, node, to AST."""
+ return ASTBuilder(space, node, compile_info).build_ast()
augassign_operator_map = {
@@ -48,6 +49,7 @@
self.root_node = n
def build_ast(self):
+ """Convert an top level parse tree node into an AST mod."""
n = self.root_node
if n.type == syms.file_input:
stmts = []
@@ -88,6 +90,7 @@
raise AssertionError("unknown root node")
def number_of_statements(self, n):
+ """Compute the number of AST statements contained in a node."""
stmt_type = n.type
if stmt_type == syms.compound_stmt:
return 1
@@ -100,10 +103,12 @@
raise AssertionError("non-statement node")
def error(self, msg, n):
+ """Raise a SyntaxError with the lineno and column set to n's."""
raise SyntaxError(msg, n.lineno, n.column,
filename=self.compile_info.filename)
def check_forbidden_name(self, name, node):
+ """Raise an error if the name cannot be assigned to."""
if name == "None":
self.error("assignment to None", node)
if name == "__debug__":
@@ -111,6 +116,7 @@
# XXX Warn about using True and False
def set_context(self, expr, ctx, node):
+ """Set the context of an expression to Store or Del if possible."""
error = None
sequence = None
if isinstance(expr, ast.Attribute):
@@ -227,6 +233,8 @@
if import_name_type == syms.import_as_name:
name = import_name.children[0].value
if len(import_name.children) == 3:
+ # 'as' is not yet a keyword in Python 2.5, so the grammar
+ # just specifies a NAME token. We check it manually here.
if import_name.children[1].value != "as":
self.error("must use 'as' in import", import_name)
as_name = import_name.children[2].value
@@ -492,6 +500,7 @@
return ast.With(test, target, body, with_node.lineno, with_node.column)
def handle_with_var(self, with_var_node):
+ # The grammar doesn't require 'as', so check it manually.
if with_var_node.children[0].value != "as":
self.error("expected \"with [expr] as [var]\"", with_var_node)
return self.handle_expr(with_var_node.children[1])
@@ -870,7 +879,7 @@
return result
def handle_factor(self, factor_node):
- # Fold '-' on constants.
+ # Fold '-' on constant numbers.
if factor_node.children[0].type == tokens.MINUS and \
len(factor_node.children) == 2:
factor = factor_node.children[1]
@@ -1100,6 +1109,7 @@
encoding = self.compile_info.encoding
sub_strings_w = [parsestring.parsestr(space, encoding, s.value)
for s in atom_node.children]
+ # This implements implicit string concatenation.
if len(sub_strings_w) > 1:
w_sub_strings = space.newlist(sub_strings_w)
w_join = space.getattr(space.wrap(""), space.wrap("join"))
Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/codegen.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/codegen.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/codegen.py Fri Jul 24 17:26:29 2009
@@ -2,6 +2,11 @@
Generate Python bytecode from a Abstract Syntax Tree.
"""
+# NOTE TO READERS: All the ugly and "obvious" isinstance assertions here are to
+# help the annotator. To it, unfortunately, everything is not so obvious. If
+# you figure out a way to remove them, great, but try a translation first,
+# please.
+
from pypy.interpreter.astcompiler import (ast2 as ast, assemble, symtable,
consts, misc)
from pypy.interpreter.astcompiler import optimize, asthelpers # For side effects
@@ -13,6 +18,7 @@
def compile_ast(space, module, info):
+ """Generate a code object from AST."""
symbols = symtable.SymtableBuilder(space, module, info)
return TopLevelCodeGenerator(space, module, symbols, info).assemble()
@@ -106,6 +112,7 @@
})
+# These are frame blocks.
F_BLOCK_LOOP = 0
F_BLOCK_EXCEPT = 1
F_BLOCK_FINALLY = 2
@@ -113,6 +120,11 @@
class PythonCodeGenerator(assemble.PythonCodeMaker):
+ """Base code generator.
+
+ A subclass of this is created for every scope to be compiled. It walks
+ across the AST tree generating bytecode as needed.
+ """
def __init__(self, space, name, tree, lineno, symbols, compile_info):
self.scope = symbols.find_scope(tree)
@@ -125,15 +137,21 @@
self._compile(tree)
def _compile(self, tree):
+ """Override in subclasses to compile a scope."""
raise NotImplementedError
def current_temporary_name(self):
+ """Return the name of the current temporary variable.
+
+ This must be in sync with the one during symbol table building.
+ """
name = "_[%d]" % (self.temporary_name_counter,)
self.temporary_name_counter += 1
assert self.scope.lookup(name) != symtable.SCOPE_UNKNOWN
return name
def sub_scope(self, kind, name, node, lineno):
+ """Convenience function for compiling a sub scope."""
generator = kind(self.space, name, node, lineno, self.symbols,
self.compile_info)
return generator.assemble()
@@ -151,6 +169,7 @@
filename=self.compile_info.filename)
def name_op(self, identifier, ctx):
+ """Generate an operation appropiate for the scope of the identifier."""
scope = self.scope.lookup(identifier)
op = ops.NOP
container = self.names
@@ -182,9 +201,11 @@
return isinstance(node, ast.Expr) and isinstance(node.value, ast.Str)
def _get_code_flags(self):
+ # Default for everything but module scopes.
return consts.CO_NEWLOCALS
def _handle_body(self, body):
+ """Compile a list of statements, handling doc strings if needed."""
if body:
start = 0
if self.is_docstring(body[0]):
@@ -212,8 +233,10 @@
mod.body.walkabout(self)
def _make_function(self, code, num_defaults=0):
+ """Emit the opcodes to turn a code object into a function."""
code_index = self.add_const(code)
if code.co_freevars:
+ # Load cell and free vars to pass on.
for free in code.co_freevars:
free_scope = self.scope.lookup(free)
if free_scope == symtable.SCOPE_CELL:
@@ -230,6 +253,7 @@
def visit_FunctionDef(self, func):
self.update_position(func.lineno, True)
+ # Load decorators first, but apply them after the function is created.
if func.decorators:
self.visit_sequence(func.decorators)
if func.args.defaults:
@@ -240,6 +264,7 @@
code = self.sub_scope(FunctionCodeGenerator, func.name, func,
func.lineno)
self._make_function(code, num_defaults)
+ # Apply decorators.
if func.decorators:
for i in range(len(func.decorators)):
self.emit_op_arg(ops.CALL_FUNCTION, 1)
@@ -403,6 +428,7 @@
if not self.frame_blocks:
self.error("'continue' not properly in loop", cont)
current_block, block = self.frame_blocks[-1]
+ # Continue cannot be in a finally block.
if current_block == F_BLOCK_LOOP:
self.emit_jump(ops.JUMP_ABSOLUTE, block, True)
elif current_block == F_BLOCK_EXCEPT or \
@@ -524,6 +550,7 @@
self.visit_sequence(tf.body)
self.emit_op(ops.POP_BLOCK)
self.pop_frame_block(F_BLOCK_FINALLY, body)
+ # Indicates there was no exception.
self.load_const(self.space.w_None)
self.use_next_block(end)
self.push_frame_block(F_BLOCK_FINALLY_END, end)
@@ -559,6 +586,8 @@
self.load_const(self.space.wrap(level))
self.load_const(self.space.w_None)
self.emit_op_name(ops.IMPORT_NAME, self.names, alias.name)
+ # If there's no asname then we store the root module. If there is
+ # an asname, _import_as stores the last module of the chain into it.
if alias.asname:
self._import_as(alias)
else:
@@ -575,6 +604,7 @@
first = imp.names[0]
assert isinstance(first, ast.alias)
star_import = len(imp.names) == 1 and first.name == "*"
+ # Various error checking for future imports.
if imp.module == "__future__":
last_line, last_offset = self.compile_info.last_future_import
if imp.lineno > last_line or \
@@ -603,6 +633,7 @@
if imp.module:
mod_name = imp.module
else:
+ # In the case of a relative import.
mod_name = ""
self.emit_op_name(ops.IMPORT_NAME, self.names, mod_name)
if star_import:
@@ -630,6 +661,7 @@
assign.targets[i].walkabout(self)
def _optimize_unpacking(self, assign):
+ """Try to optimize out BUILD_TUPLE and UNPACK_SEQUENCE opcodes."""
if len(assign.targets) != 1:
return False
targets = assign.targets[0].as_node_list(self.space)
@@ -745,6 +777,7 @@
if self.interactive:
expr.value.walkabout(self)
self.emit_op(ops.PRINT_EXPR)
+ # Only compile if the expression isn't constant.
elif not expr.value.constant:
expr.value.walkabout(self)
self.emit_op(ops.POP_TOP)
@@ -1230,6 +1263,7 @@
def _compile(self, func):
assert isinstance(func, ast.FunctionDef)
+ # If there's a docstring, store it as the first constant.
if self.is_docstring(func.body[0]):
doc_string = func.body[0]
assert isinstance(doc_string, ast.Expr)
Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/misc.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/misc.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/misc.py Fri Jul 24 17:26:29 2009
@@ -16,6 +16,10 @@
del app
def syntax_warning(space, msg, fn, lineno, offset):
+ """Raise an applevel SyntaxWarning.
+
+ If the user has set this warning to raise an error, a SyntaxError will be
+ raised."""
w_msg = space.wrap(msg)
w_filename = space.wrap(fn)
w_lineno = space.wrap(lineno)
@@ -24,6 +28,7 @@
def dict_to_switch(d):
+ """Convert of dictionary with integer keys to a switch statement."""
def lookup(query):
if we_are_translated():
for key, value in unrolling_iteritems:
Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/optimize.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/optimize.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/optimize.py Fri Jul 24 17:26:29 2009
@@ -1,3 +1,4 @@
+"""codegen helpers and AST constant folding."""
import sys
import itertools
@@ -19,9 +20,11 @@
class __extend__(ast.AST):
def as_constant_truth(self, space):
+ """Return the truth of this node if known."""
raise AssertionError("only for expressions")
def as_constant(self):
+ """Return the value of this node as a wrapped constant if possible."""
raise AssertionError("only for expressions")
def accept_jump_if(self, gen, condition, target):
@@ -149,6 +152,7 @@
class OptimizingVisitor(ast.ASTVisitor):
+ """Constant folds AST."""
def __init__(self, space, compile_info):
self.space = space
@@ -163,6 +167,9 @@
right = binop.right.as_constant()
if right is not None:
op = binop.op
+ # Can't fold straight division without "from __future_ import
+ # division" because it might be affected at runtime by the -Q
+ # flag.
if op == ast.Div and \
not self.compile_info.flags & consts.CO_FUTURE_DIVISION:
return binop
@@ -173,9 +180,12 @@
break
else:
raise AssertionError("unknown binary operation")
+ # Let all errors be found at runtime.
except OperationError:
pass
else:
+ # To avoid blowing up the size of pyc files, we only fold
+ # reasonably sized sequences.
try:
w_len = self.space.len(w_const)
except OperationError:
@@ -244,12 +254,15 @@
return rep
def visit_Name(self, name):
+ # Turn loading None into a constant lookup. Eventaully, we can do this
+ # for True and False, too.
if name.id == "None":
assert name.ctx == ast.Load
return ast.Const(self.space.w_None, name.lineno, name.col_offset)
return name
def visit_Tuple(self, tup):
+ """Try to turn tuple building into a constant."""
if tup.elts:
consts_w = [None]*len(tup.elts)
for i in range(len(tup.elts)):
Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/symtable.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/symtable.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/symtable.py Fri Jul 24 17:26:29 2009
@@ -44,16 +44,22 @@
self.nested = False
def lookup(self, name):
+ """Find the scope of identifier 'name'."""
return self.symbols.get(self.mangle(name), SCOPE_UNKNOWN)
def lookup_role(self, name):
return self.roles.get(self.mangle(name), SYM_BLANK)
def new_temporary_name(self):
+ """Return the next temporary name.
+
+ This must be in sync with PythonCodeGenerator's counter.
+ """
self.note_symbol("_[%d]" % (self.temp_name_counter,), SYM_ASSIGNED)
self.temp_name_counter += 1
def note_symbol(self, identifier, role):
+ """Record that identifier occurs in this scope."""
mangled = self.mangle(identifier)
new_role = role
if mangled in self.roles:
@@ -69,17 +75,21 @@
return mangled
def note_yield(self, yield_node):
+ """Called when a yield is found."""
raise SyntaxError("'yield' outside function", yield_node.lineno,
yield_node.col_offset)
def note_return(self, ret):
+ """Called when a return statement is found."""
raise SyntaxError("return outside function", ret.lineno,
ret.col_offset)
def note_exec(self, exc):
+ """Called when an exec statement is found."""
self.has_exec = True
def note_import_star(self, imp):
+ """Called when a start import is found."""
pass
def mangle(self, name):
@@ -89,10 +99,12 @@
return name
def add_child(self, child_scope):
+ """Note a new child scope."""
child_scope.parent = self
self.children.append(child_scope)
def _finalize_name(self, name, flags, local, bound, free, globs):
+ """Decide on the scope of a name."""
if flags & SYM_GLOBAL:
if flags & SYM_PARAM:
err = "name '%s' is both local and global" % (name,)
@@ -124,11 +136,13 @@
self.symbols[name] = SCOPE_GLOBAL_IMPLICIT
def _pass_on_bindings(self, local, bound, globs, new_bound, new_globs):
+ """Allow child scopes to see names bound here and in outer scopes."""
new_globs.update(globs)
if bound:
new_bound.update(bound)
def _finalize_cells(self, free):
+ """Hook for FunctionScope."""
pass
def _check_optimization(self):
@@ -137,6 +151,7 @@
_hide_bound_from_nested_scopes = False
def finalize(self, bound, free, globs):
+ """Enter final bookeeping data in to self.symbols."""
self.symbols = {}
local = {}
new_globs = {}
@@ -150,6 +165,8 @@
self._pass_on_bindings(local, bound, globs, new_bound, new_globs)
child_frees = {}
for child in self.children:
+ # Symbol dictionaries are copied to avoid having child scopes
+ # pollute each other's.
child_free = new_free.copy()
child.finalize(new_bound.copy(), child_free, new_globs.copy())
child_frees.update(child_free)
@@ -279,6 +296,7 @@
class SymtableBuilder(ast.GenericASTVisitor):
+ """Find symbol information from AST."""
def __init__(self, space, module, compile_info):
self.space = space
@@ -300,6 +318,7 @@
assert not self.stack
def push_scope(self, scope, node):
+ """Push a child scope."""
if self.stack:
self.stack[-1].add_child(scope)
self.stack.append(scope)
@@ -315,13 +334,16 @@
self.scope = None
def find_scope(self, scope_node):
+ """Lookup the scope for a given AST node."""
return self.scopes[scope_node]
def implicit_arg(self, pos):
+ """Note a implicit arg for implicit tuple unpacking."""
name = ".%d" % (pos,)
self.note_symbol(name, SYM_PARAM)
def note_symbol(self, identifier, role):
+ """Note the identifer on the current scope."""
mangled = self.scope.note_symbol(identifier, role)
if role & SYM_GLOBAL:
if mangled in self.globs:
@@ -330,6 +352,7 @@
def visit_FunctionDef(self, func):
self.note_symbol(func.name, SYM_ASSIGNED)
+ # Function defaults and decorators happen in the outer scope.
if func.args.defaults:
self.visit_sequence(func.args.defaults)
if func.decorators:
@@ -449,6 +472,8 @@
if isinstance(arg, ast.Name):
self.note_symbol(arg.id, SYM_PARAM)
elif isinstance(arg, ast.Tuple):
+ # Tuple unpacking in the argument list. Add a secret variable
+ # name to recieve the tuple with.
if is_toplevel:
self.implicit_arg(i)
else:
Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py Fri Jul 24 17:26:29 2009
@@ -89,6 +89,10 @@
self.stack = None
def prepare(self, start=-1):
+ """Setup the parser for parsing.
+
+ Takes the starting symbol as an argument.
+ """
if start == -1:
start = self.grammar.start
self.root = None
@@ -106,28 +110,37 @@
for i, next_state in arcs:
sym_id = self.grammar.labels[i]
if label_index == i:
+ # We matched a non-terminal.
self.shift(next_state, token_type, value, lineno, column)
state = states[next_state]
+ # While the only possible action is to accept, pop nodes off
+ # the stack.
while state[1] and not state[0]:
self.pop()
if not self.stack:
+ # Parsing is done.
return True
dfa, state_index, node = self.stack[-1]
state = dfa[0][state_index]
return False
elif sym_id >= 256:
sub_node_dfa = self.grammar.dfas[sym_id]
+ # Check if this token can start a child node.
if label_index in sub_node_dfa[1]:
self.push(sub_node_dfa, next_state, sym_id, lineno,
column)
break
else:
+ # We failed to find any arcs to another state, so unless this
+ # state is accepting, it's invalid input.
if is_accepting:
self.pop()
if not self.stack:
raise ParseError("too much input", token_type, value,
lineno, column, line)
else:
+ # If only one possible input would satisfy, attach it to the
+ # error.
if len(arcs) == 1:
expected = sym_id
else:
@@ -136,6 +149,7 @@
column, line, expected)
def classify(self, token_type, value, lineno, column, line):
+ """Find the label for a token."""
if token_type == self.grammar.KEYWORD_TOKEN:
label_index = self.grammar.keyword_ids.get(value, -1)
if label_index != -1:
@@ -147,18 +161,21 @@
return label_index
def shift(self, next_state, token_type, value, lineno, column):
+ """Shift a non-terminal and prepare for the next state."""
dfa, state, node = self.stack[-1]
new_node = Node(token_type, value, None, lineno, column)
node.children.append(new_node)
self.stack[-1] = (dfa, next_state, node)
def push(self, next_dfa, next_state, node_type, lineno, column):
+ """Push a terminal and adjust the current state."""
dfa, state, node = self.stack[-1]
new_node = Node(node_type, None, [], lineno, column)
self.stack[-1] = (dfa, next_state, node)
self.stack.append((next_dfa, 0, new_node))
def pop(self):
+ """Pop an entry off the stack and make its node a child of the last."""
dfa, state, node = self.stack.pop()
if self.stack:
self.stack[-1][2].children.append(node)
Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py Fri Jul 24 17:26:29 2009
@@ -57,6 +57,15 @@
class CompileInfo(object):
+ """Stores information about the source being compiled.
+
+ * filename: The filename of the source.
+ * mode: The parse mode to use. ('exec', 'eval', or 'single')
+ * flags: Parser and compiler flags.
+ * encoding: The source encoding.
+ * last_future_import: The line number and offset of the last __future__
+ import.
+ """
def __init__(self, filename, mode="exec", flags=0, future_pos=(0, 0)):
self.filename = filename
@@ -79,7 +88,11 @@
self.space = space
def parse_source(self, textsrc, compile_info):
- """Parse a python source according to goal"""
+ """Main entry point for parsing Python source.
+
+ Everything from decoding the source to tokenizing to building the parse
+ tree is handled here.
+ """
# Detect source encoding.
enc = None
if textsrc.startswith("\xEF\xBB\xBF"):
@@ -111,10 +124,18 @@
raise
flags = compile_info.flags
+
+ # In order to not raise errors when 'as' or 'with' are used as names in
+ # code that does not explicitly enable the with statement, we have two
+ # grammars. One with 'as' and 'with' and keywords and one without.
+ # This is far better than CPython, where the parser is hacked up to
+ # check for __future__ imports and recognize new keywords accordingly.
if flags & consts.CO_FUTURE_WITH_STATEMENT:
self.grammar = pygram.python_grammar
else:
self.grammar = pygram.python_grammar_no_with_statement
+
+ # The tokenizer is very picky about how it wants its input.
source_lines = textsrc.splitlines(True)
if source_lines and not source_lines[-1].endswith("\n"):
source_lines[-1] += '\n'
@@ -129,6 +150,8 @@
if self.add_token(tp, value, lineno, column, line):
break
except parser.ParseError, e:
+ # Catch parse errors, pretty them up and reraise them as a
+ # SyntaxError.
new_err = error.IndentationError
if tp == pygram.tokens.INDENT:
msg = "unexpected indent"
@@ -142,6 +165,7 @@
else:
tree = self.root
finally:
+ # Avoid hanging onto the tree.
self.root = None
if enc is not None:
compile_info.encoding = enc
Modified: pypy/branch/parser-compiler/pypy/interpreter/test/test_compiler.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/test/test_compiler.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/test/test_compiler.py Fri Jul 24 17:26:29 2009
@@ -744,6 +744,7 @@
def test_none_constant(self):
import opcode
co = compile("def f(): return None", "<test>", "exec").co_consts[0]
+ assert "None" not in co.co_names
co = co.co_code
op = ord(co[0]) + (ord(co[1]) << 8)
assert op == opcode.opmap["LOAD_CONST"]
More information about the Pypy-commit
mailing list