[pypy-commit] pypy py3.6: merge py3.6-wordcode
cfbolz
pypy.commits at gmail.com
Sat Jul 28 09:23:52 EDT 2018
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: py3.6
Changeset: r94914:2e5eb984c7fa
Date: 2018-07-28 15:22 +0200
http://bitbucket.org/pypy/pypy/changeset/2e5eb984c7fa/
Log: merge py3.6-wordcode
switch the py3.6 over to the new wordcode format introduced in
CPython 3.6.
diff too long, truncating to 2000 out of 2009 lines
diff --git a/lib-python/3/opcode.py b/lib-python/3/opcode.py
--- a/lib-python/3/opcode.py
+++ b/lib-python/3/opcode.py
@@ -216,6 +216,7 @@
def_op('BUILD_SET_UNPACK', 153)
def_op('FORMAT_VALUE', 155) # in CPython 3.6, but available in PyPy from 3.5
+def_op('BUILD_CONST_KEY_MAP', 156)
def_op('BUILD_STRING', 157) # in CPython 3.6, but available in PyPy from 3.5
# pypy modification, experimental bytecode
diff --git a/lib-python/3/test/test_opcodes.py b/lib-python/3/test/test_opcodes.py
--- a/lib-python/3/test/test_opcodes.py
+++ b/lib-python/3/test/test_opcodes.py
@@ -27,7 +27,9 @@
with open(ann_module.__file__) as f:
txt = f.read()
co = compile(txt, ann_module.__file__, 'exec')
- self.assertEqual(co.co_firstlineno, 6)
+ # On PyPy, the lineno of multiline tokens is the *first* line, on
+ # CPython the last (CPython expects 6 here)
+ self.assertEqual(co.co_firstlineno, 3)
except OSError:
pass
diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py
--- a/pypy/interpreter/astcompiler/assemble.py
+++ b/pypy/interpreter/astcompiler/assemble.py
@@ -21,6 +21,8 @@
def __init__(self, opcode, arg=0):
self.opcode = opcode
self.arg = arg
+ if opcode < ops.HAVE_ARGUMENT:
+ assert arg == 0
self.lineno = 0
self.has_jump = False
@@ -28,9 +30,33 @@
"""Return the size of bytes of this instruction when it is
encoded.
"""
- if self.opcode >= ops.HAVE_ARGUMENT:
- return (6 if self.arg > 0xFFFF else 3)
- return 1
+ if self.arg <= 0xff:
+ return 2
+ if self.arg <= 0xffff:
+ return 4
+ if self.arg <= 0xffffff:
+ return 6
+ return 8
+
+ def encode(self, code):
+ opcode = self.opcode
+
+ arg = self.arg
+ size = self.size()
+ if size == 8:
+ code.append(chr(ops.EXTENDED_ARG))
+ code.append(chr((arg >> 24) & 0xff))
+ assert ((arg >> 24) & 0xff) == (arg >> 24)
+ if size >= 6:
+ code.append(chr(ops.EXTENDED_ARG))
+ code.append(chr((arg >> 16) & 0xff))
+ if size >= 4:
+ code.append(chr(ops.EXTENDED_ARG))
+ code.append(chr((arg >> 8) & 0xff))
+ if size >= 2:
+ code.append(chr(opcode))
+ code.append(chr(arg & 0xff))
+
def jump_to(self, target, absolute=False):
"""Indicate the target this jump instruction.
@@ -121,20 +147,9 @@
"""Encode the instructions in this block into bytecode."""
code = []
for instr in self.instructions:
- opcode = instr.opcode
- if opcode >= ops.HAVE_ARGUMENT:
- arg = instr.arg
- if instr.arg > 0xFFFF:
- ext = arg >> 16
- code.append(chr(ops.EXTENDED_ARG))
- code.append(chr(ext & 0xFF))
- code.append(chr(ext >> 8))
- arg &= 0xFFFF
- code.append(chr(opcode))
- code.append(chr(arg & 0xFF))
- code.append(chr(arg >> 8))
- else:
- code.append(chr(opcode))
+ instr.encode(code)
+ assert len(code) == self.code_size()
+ assert len(code) & 1 == 0
return ''.join(code)
@@ -185,6 +200,13 @@
self.lineno = 0
self.add_none_to_final_return = True
+ def _check_consistency(self, blocks):
+ current_off = 0
+ for block in blocks:
+ assert block.offset == current_off
+ for instr in block.instructions:
+ current_off += instr.size()
+
def new_block(self):
return Block()
@@ -315,14 +337,12 @@
def _resolve_block_targets(self, blocks):
"""Compute the arguments of jump instructions."""
- last_extended_arg_count = 0
# The reason for this loop is extended jumps. EXTENDED_ARG
- # extends the bytecode size, so it might invalidate the offsets
- # we've already given. Thus we have to loop until the number of
- # extended args is stable. Any extended jump at all is
- # extremely rare, so performance is not too concerning.
+ # extends the bytecode size, so it might invalidate the offsets we've
+ # already given. Thus we have to loop until the size of all jump
+ # instructions is stable. Any extended jump at all is extremely rare,
+ # so performance should not be too concerning.
while True:
- extended_arg_count = 0
offset = 0
force_redo = False
# Calculate the code offset of each block.
@@ -332,7 +352,8 @@
for block in blocks:
offset = block.offset
for instr in block.instructions:
- offset += instr.size()
+ size = instr.size()
+ offset += size
if instr.has_jump:
target, absolute = instr.jump
op = instr.opcode
@@ -351,22 +372,21 @@
instr.opcode = ops.RETURN_VALUE
instr.arg = 0
instr.has_jump = False
- # The size of the code changed,
+ # The size of the code maybe have changed,
# we have to trigger another pass
- force_redo = True
+ if instr.size() != size:
+ force_redo = True
continue
if absolute:
jump_arg = target.offset
else:
jump_arg = target.offset - offset
instr.arg = jump_arg
- if jump_arg > 0xFFFF:
- extended_arg_count += 1
- if (extended_arg_count == last_extended_arg_count and
- not force_redo):
- break
- else:
- last_extended_arg_count = extended_arg_count
+ if instr.size() != size:
+ force_redo = True
+ if not force_redo:
+ self._check_consistency(blocks)
+ return
def _build_consts_array(self):
"""Turn the applevel constants dictionary into a list."""
@@ -738,7 +758,7 @@
return -2 - _num_args(arg) - ((arg >> 16) & 0xFFFF)
def _compute_MAKE_FUNCTION(arg):
- return -1 - _num_args(arg) - ((arg >> 16) & 0xFFFF)
+ return -1 - bool(arg & 0x01) - bool(arg & 0x02) - bool(arg & 0x04) - bool(arg & 0x08)
def _compute_BUILD_SLICE(arg):
if arg == 3:
@@ -775,6 +795,9 @@
def _compute_BUILD_STRING(arg):
return 1 - arg
+def _compute_BUILD_CONST_KEY_MAP(arg):
+ return -arg
+
_stack_effect_computers = {}
for name, func in globals().items():
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -307,7 +307,7 @@
# if the scope contained an annotated variable assignemt,
# this will emit the requisite SETUP_ANNOTATIONS
if self.scope.contains_annotated and not isinstance(self, AbstractFunctionCodeGenerator):
- self.emit_op(ops.SETUP_ANNOTATIONS)
+ return self.emit_op(ops.SETUP_ANNOTATIONS)
def visit_Module(self, mod):
if not self._handle_body(mod.body):
@@ -321,10 +321,11 @@
self.add_none_to_final_return = False
mod.body.walkabout(self)
- def _make_function(self, code, num_defaults=0, qualname=None):
+ def _make_function(self, code, oparg=0, qualname=None):
"""Emit the opcodes to turn a code object into a function."""
w_qualname = self.space.newtext(qualname or code.co_name)
if code.co_freevars:
+ oparg = oparg | 0x08
# Load cell and free vars to pass on.
for free in code.co_freevars:
free_scope = self.scope.lookup(free)
@@ -335,24 +336,25 @@
index = self.free_vars[free]
self.emit_op_arg(ops.LOAD_CLOSURE, index)
self.emit_op_arg(ops.BUILD_TUPLE, len(code.co_freevars))
- self.load_const(code)
- self.load_const(w_qualname)
- self.emit_op_arg(ops.MAKE_CLOSURE, num_defaults)
- else:
- self.load_const(code)
- self.load_const(w_qualname)
- self.emit_op_arg(ops.MAKE_FUNCTION, num_defaults)
+ self.load_const(code)
+ self.load_const(w_qualname)
+ self.emit_op_arg(ops.MAKE_FUNCTION, oparg)
def _visit_kwonlydefaults(self, args):
defaults = 0
+ keys_w = []
for i, default in enumerate(args.kw_defaults):
if default:
kwonly = args.kwonlyargs[i]
assert isinstance(kwonly, ast.arg)
mangled = self.scope.mangle(kwonly.arg)
- self.load_const(self.space.newtext(mangled))
+ keys_w.append(self.space.newtext(mangled))
default.walkabout(self)
defaults += 1
+ if keys_w:
+ w_tup = self.space.newtuple(keys_w)
+ self.load_const(w_tup)
+ self.emit_op_arg(ops.BUILD_CONST_KEY_MAP, len(keys_w))
return defaults
def _visit_arg_annotation(self, name, ann, names):
@@ -387,7 +389,7 @@
self.error("too many annotations", func)
w_tup = space.newtuple([space.newtext(name) for name in names])
self.load_const(w_tup)
- l += 1
+ self.emit_op_arg(ops.BUILD_CONST_KEY_MAP, l)
return l
@specialize.arg(2)
@@ -396,16 +398,25 @@
# Load decorators first, but apply them after the function is created.
self.visit_sequence(func.decorator_list)
args = func.args
+
assert isinstance(args, ast.arguments)
+
+ oparg = 0
self.visit_sequence(args.defaults)
- kw_default_count = 0
+
+ if args.defaults is not None and len(args.defaults):
+ oparg = oparg | 0x01
+ self.emit_op_arg(ops.BUILD_TUPLE, len(args.defaults))
+
if args.kwonlyargs:
kw_default_count = self._visit_kwonlydefaults(args)
+ if kw_default_count:
+ oparg = oparg | 0x02
+
num_annotations = self._visit_annotations(func, args, func.returns)
- num_defaults = len(args.defaults) if args.defaults is not None else 0
- oparg = num_defaults
- oparg |= kw_default_count << 8
- oparg |= num_annotations << 16
+ if num_annotations:
+ oparg = oparg | 0x04
+
code, qualname = self.sub_scope(function_code_generator, func.name,
func, func.lineno)
self._make_function(code, oparg, qualname=qualname)
@@ -425,15 +436,20 @@
self.update_position(lam.lineno)
args = lam.args
assert isinstance(args, ast.arguments)
+
self.visit_sequence(args.defaults)
- kw_default_count = 0
+
+ oparg = 0
+ if args.defaults is not None and len(args.defaults):
+ oparg = oparg | 0x01
+ self.emit_op_arg(ops.BUILD_TUPLE, len(args.defaults))
+
if args.kwonlyargs:
kw_default_count = self._visit_kwonlydefaults(args)
- default_count = len(args.defaults) if args.defaults is not None else 0
+ if kw_default_count:
+ oparg = oparg | 0x02
code, qualname = self.sub_scope(
LambdaCodeGenerator, "<lambda>", lam, lam.lineno)
- oparg = default_count
- oparg |= kw_default_count << 8
self._make_function(code, oparg, qualname=qualname)
def visit_ClassDef(self, cls):
@@ -1281,28 +1297,46 @@
containers = 0
elements = 0
is_unpacking = False
+ all_constant_keys_w = None
if d.values:
+ if len(d.keys) < 0xffff:
+ all_constant_keys_w = []
+ for key in d.keys:
+ if key is None or key.as_constant() is None:
+ all_constant_keys_w = None
+ break
+ else:
+ all_constant_keys_w.append(key.as_constant())
for i in range(len(d.values)):
key = d.keys[i]
is_unpacking = key is None
if elements == 0xFFFF or (elements and is_unpacking):
+ assert all_constant_keys_w is None
self.emit_op_arg(ops.BUILD_MAP, elements)
containers += 1
elements = 0
if is_unpacking:
+ assert all_constant_keys_w is None
d.values[i].walkabout(self)
containers += 1
else:
- key.walkabout(self)
+ if not all_constant_keys_w:
+ key.walkabout(self)
d.values[i].walkabout(self)
elements += 1
if elements or containers == 0:
- self.emit_op_arg(ops.BUILD_MAP, elements)
- containers += 1
+ if all_constant_keys_w:
+ w_tup = self.space.newtuple(all_constant_keys_w)
+ self.load_const(w_tup)
+ self.emit_op_arg(ops.BUILD_CONST_KEY_MAP, elements)
+ else:
+ self.emit_op_arg(ops.BUILD_MAP, elements)
+ containers += 1
# If there is more than one dict, they need to be merged into
# a new dict. If there is one dict and it's an unpacking, then
#it needs to be copied into a new dict.
while containers > 1 or is_unpacking:
+ assert all_constant_keys_w is None
oparg = min(containers, 255)
self.emit_op_arg(ops.BUILD_MAP_UNPACK, oparg)
containers -= (oparg - 1)
@@ -1699,6 +1733,12 @@
symbols, compile_info, qualname=None)
def _compile(self, tree):
+ if isinstance(tree, ast.Module):
+ if tree.body:
+ self.first_lineno = tree.body[0].lineno
+ else:
+ self.first_lineno = self.lineno = 1
+
self._maybe_setup_annotations()
tree.walkabout(self)
diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py
--- a/pypy/interpreter/astcompiler/test/test_astbuilder.py
+++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py
@@ -939,7 +939,7 @@
def test_flufl(self):
source = "x <> y"
- raises(SyntaxError, self.get_ast, source)
+ py.test.raises(SyntaxError, self.get_ast, source)
comp = self.get_first_expr(source,
flags=consts.CO_FUTURE_BARRY_AS_BDFL)
assert isinstance(comp, ast.Compare)
@@ -1167,7 +1167,7 @@
s = self.get_first_expr("b'hi' b' implicitly' b' extra'")
assert isinstance(s, ast.Bytes)
assert space.eq_w(s.s, space.newbytes("hi implicitly extra"))
- raises(SyntaxError, self.get_first_expr, "b'hello' 'world'")
+ py.test.raises(SyntaxError, self.get_first_expr, "b'hello' 'world'")
sentence = u"Die Männer ärgern sich!"
source = u"# coding: utf-7\nstuff = '%s'" % (sentence,)
info = pyparse.CompileInfo("<test>", "exec")
@@ -1362,8 +1362,8 @@
assert isinstance(if2, ast.Name)
def test_cpython_issue12983(self):
- raises(SyntaxError, self.get_ast, r"""b'\x'""")
- raises(SyntaxError, self.get_ast, r"""b'\x0'""")
+ py.test.raises(SyntaxError, self.get_ast, r"""b'\x'""")
+ py.test.raises(SyntaxError, self.get_ast, r"""b'\x0'""")
def test_matmul(self):
mod = self.get_ast("a @ b")
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -72,10 +72,9 @@
def check(self, w_dict, evalexpr, expected):
# for now, we compile evalexpr with CPython's compiler but run
# it with our own interpreter to extract the data from w_dict
- co_expr = compile(evalexpr, '<evalexpr>', 'eval')
space = self.space
- pyco_expr = PyCode._from_code(space, co_expr)
- w_res = pyco_expr.exec_host_bytecode(w_dict, w_dict)
+ pyco_expr = space.createcompiler().compile(evalexpr, '<evalexpr>', 'eval', 0)
+ w_res = space.exec_(pyco_expr, w_dict, w_dict)
res = space.str_w(space.repr(w_res))
expected_repr = self.get_py3_repr(expected)
if isinstance(expected, float):
@@ -1011,6 +1010,7 @@
("C4.__doc__", 'docstring'),
("C4.__doc__", 'docstring'),
("__doc__", None),])
+
def test_remove_docstring(self, expr, result):
source = '"module_docstring"\n' + """if 1:
def f1():
@@ -1210,12 +1210,12 @@
yield self.st, """z=f'{f"{0}"*3}'""", 'z', '000'
def test_fstring_error(self):
- raises(SyntaxError, self.run, "f'{}'")
- raises(SyntaxError, self.run, "f'{ \t }'")
- raises(SyntaxError, self.run, "f'{5#}'")
- raises(SyntaxError, self.run, "f'{5)#}'")
- raises(SyntaxError, self.run, "f'''{5)\n#}'''")
- raises(SyntaxError, self.run, "f'\\x'")
+ py.test.raises(SyntaxError, self.run, "f'{}'")
+ py.test.raises(SyntaxError, self.run, "f'{ \t }'")
+ py.test.raises(SyntaxError, self.run, "f'{5#}'")
+ py.test.raises(SyntaxError, self.run, "f'{5)#}'")
+ py.test.raises(SyntaxError, self.run, "f'''{5)\n#}'''")
+ py.test.raises(SyntaxError, self.run, "f'\\x'")
def test_fstring_encoding(self):
src = """# -*- coding: latin-1 -*-\nz=ord(f'{"\xd8"}')\n"""
@@ -1512,3 +1512,18 @@
del []
"""
generate_function_code(source, self.space)
+
+ def test_make_constant_map(self):
+ source = """def f():
+ return {"A": 1, "b": 2}
+ """
+ counts = self.count_instructions(source)
+ assert ops.BUILD_MAP not in counts
+ source = """def f():
+ return {"a": 1, "b": {}, 1: {"a": x}}
+ """
+ counts = self.count_instructions(source)
+ assert counts[ops.BUILD_MAP] == 1 # the empty dict
+ assert counts[ops.BUILD_CONST_KEY_MAP] == 2
+
+
diff --git a/pypy/interpreter/astcompiler/test/test_misc.py b/pypy/interpreter/astcompiler/test/test_misc.py
--- a/pypy/interpreter/astcompiler/test/test_misc.py
+++ b/pypy/interpreter/astcompiler/test/test_misc.py
@@ -1,4 +1,5 @@
from pypy.interpreter.astcompiler.misc import mangle
+from pypy.interpreter.astcompiler.assemble import Instruction, ops
def test_mangle():
assert mangle("foo", "Bar") == "foo"
@@ -13,6 +14,34 @@
assert mangle("__foo", "___") == "__foo"
assert mangle("___foo", "__Bar") == "_Bar___foo"
+def test_instruction_size():
+ assert Instruction(ops.POP_TOP).size() == 2
+ assert Instruction(ops.LOAD_FAST, 23).size() == 2
+ assert Instruction(ops.LOAD_FAST, 0xfff0).size() == 4
+ assert Instruction(ops.LOAD_FAST, 0x10000).size() == 6
+ assert Instruction(ops.LOAD_FAST, 0x1000000).size() == 8
+
+def test_instruction_encode():
+ c = []
+ Instruction(ops.POP_TOP).encode(c)
+ assert c == [chr(ops.POP_TOP), '\x00']
+
+ c = []
+ Instruction(ops.LOAD_FAST, 1).encode(c)
+ assert c == [chr(ops.LOAD_FAST), '\x01']
+
+ c = []
+ Instruction(ops.LOAD_FAST, 0x201).encode(c)
+ assert c == [chr(ops.EXTENDED_ARG), '\x02', chr(ops.LOAD_FAST), '\x01']
+
+ c = []
+ Instruction(ops.LOAD_FAST, 0x30201).encode(c)
+ assert c == [chr(ops.EXTENDED_ARG), '\x03', chr(ops.EXTENDED_ARG), '\x02', chr(ops.LOAD_FAST), '\x01']
+
+ c = []
+ Instruction(ops.LOAD_FAST, 0x5030201).encode(c)
+ assert c == [chr(ops.EXTENDED_ARG), '\x05', chr(ops.EXTENDED_ARG), '\x03', chr(ops.EXTENDED_ARG), '\x02', chr(ops.LOAD_FAST), '\x01']
+
def app_test_warning_to_error_translation():
import warnings
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -166,16 +166,20 @@
# Normal case: the call above raises Yield.
# We reach this point if the iterable is exhausted.
last_instr = jit.promote(frame.last_instr)
+ assert last_instr & 1 == 0
assert last_instr >= 0
- return r_uint(last_instr + 1)
+ return r_uint(last_instr + 2)
if isinstance(w_arg_or_err, SApplicationException):
return frame.handle_generator_error(w_arg_or_err.operr)
last_instr = jit.promote(frame.last_instr)
if last_instr != -1:
+ assert last_instr & 1 == 0
frame.pushvalue(w_arg_or_err)
- return r_uint(last_instr + 1)
+ return r_uint(last_instr + 2)
+ else:
+ return r_uint(0)
def next_yield_from(self, frame, w_yf, w_inputvalue_or_err):
"""Fetch the next item of the current 'yield from', push it on
diff --git a/pypy/interpreter/interactive.py b/pypy/interpreter/interactive.py
--- a/pypy/interpreter/interactive.py
+++ b/pypy/interpreter/interactive.py
@@ -213,7 +213,7 @@
ec.bytecode_only_trace = self._orig_bytecode_only_trace
def _do_bytecode_only_trace(self, frame):
- from pypy.tool.pydis import Bytecode, HAVE_ARGUMENT
+ from pypy.tool import opcode3, dis3
if frame.hide():
return
@@ -221,18 +221,12 @@
self.unsettrace()
next_instr = frame.last_instr
opcode = ord(frame.pycode.co_code[next_instr])
+ oparg = ord(frame.pycode.co_code[next_instr+1])
- oparg = 0
- if opcode >= HAVE_ARGUMENT:
- lo = ord(frame.pycode.co_code[next_instr+1])
- hi = ord(frame.pycode.co_code[next_instr+2])
- oparg = (hi * 256) | lo
-
- class fake:
- code = frame.pycode
- bytecode = Bytecode(fake, next_instr, oparg, 0)
+ argrepr = reprargstring(self.space, frame.pycode, opcode, oparg)
+ oprepr = opcode3.opname[opcode] + argrepr.ljust(5)
print '\t%-19s %s' % (str(frame.pycode.co_name) + ':',
- bytecode.repr_with_space(self.space))
+ oprepr)
self.settrace()
def checktrace(self):
@@ -255,3 +249,26 @@
class IncompleteInput(Exception):
pass
+
+
+def reprargstring(space, pycode, opcode, oparg):
+ """ return a string representation of any arguments. (empty for no args)"""
+ from pypy.tool import opcode3
+ if oparg is None:
+ return ''
+ s = repr(oparg).rjust(5) + " "
+ if opcode in opcode3.hasconst:
+ r = space.text_w(space.repr(pycode.co_consts_w[oparg]))
+ s += '(' + r + ')'
+ elif opcode in opcode3.hasname:
+ s += '(' + pycode.co_names[oparg] + ')'
+ elif opcode in opcode3.hasjrel:
+ s += '(to ' + repr(self.index + oparg) + ')'
+ elif opcode in opcode3.haslocal:
+ s += '(' + pycode.co_varnames[oparg] + ')'
+ elif opcode in opcode3.hascompare:
+ s += '(' + opcode3.cmp_op[oparg] + ')'
+ elif opcode in opcode3.hasfree:
+ free = pycode.co_cellvars + pycode.co_freevars
+ s += '(' + free[oparg] + ')'
+ return s
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -39,7 +39,7 @@
# time you make pyc files incompatible. This value ends up in the frozen
# importlib, via MAGIC_NUMBER in module/_frozen_importlib/__init__.
-pypy_incremental_magic = 128 # bump it by 16
+pypy_incremental_magic = 160 # bump it by 16
assert pypy_incremental_magic % 16 == 0
assert pypy_incremental_magic < 3000 # the magic number of Python 3. There are
# no known magic numbers below this value
@@ -216,6 +216,7 @@
"""
Hack to initialize the code object from a real (CPython) one.
"""
+ raise TypeError("assert reinterpretation for applevel tests is broken on PyPy3!")
assert isinstance(code, types.CodeType)
newconsts_w = [None] * len(code.co_consts)
num = 0
@@ -301,11 +302,7 @@
w_co.remove_docstrings(space)
def exec_host_bytecode(self, w_globals, w_locals):
- if sys.version_info < (2, 7):
- raise Exception("PyPy no longer supports Python 2.6 or lower")
- frame = self.space.FrameClass(self.space, self, w_globals, None)
- frame.setdictscope(w_locals)
- return frame.run()
+ raise Exception("no longer supported after the switch to wordcode!")
def dump(self):
"""NOT_RPYTHON: A dis.dis() dump of the code object."""
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -691,6 +691,7 @@
endblock = [-1] # current finally/except block stack
addr = 0
while addr < len(code):
+ assert addr & 1 == 0
op = ord(code[addr])
if op in (SETUP_LOOP, SETUP_EXCEPT, SETUP_FINALLY, SETUP_WITH,
SETUP_ASYNC_WITH):
@@ -713,10 +714,7 @@
if addr == self.last_instr:
f_lasti_handler_addr = endblock[-1]
- if op >= HAVE_ARGUMENT:
- addr += 3
- else:
- addr += 1
+ addr += 2
if len(blockstack) != 0 or len(endblock) != 1:
raise oefmt(space.w_SystemError,
@@ -774,6 +772,7 @@
block.cleanupstack(self)
self.getorcreatedebug().f_lineno = new_lineno
+ assert new_lasti & 1 == 0
self.last_instr = new_lasti
def get_last_lineno(self):
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -62,6 +62,7 @@
co_code = pycode.co_code
try:
while True:
+ assert next_instr & 1 == 0
next_instr = self.handle_bytecode(co_code, next_instr, ec)
except ExitFrame:
return self.popvalue()
@@ -152,22 +153,17 @@
@jit.unroll_safe
def dispatch_bytecode(self, co_code, next_instr, ec):
while True:
+ assert next_instr & 1 == 0
self.last_instr = intmask(next_instr)
if jit.we_are_jitted():
ec.bytecode_only_trace(self)
else:
ec.bytecode_trace(self)
next_instr = r_uint(self.last_instr)
+ assert next_instr & 1 == 0
opcode = ord(co_code[next_instr])
- next_instr += 1
-
- if opcode >= HAVE_ARGUMENT:
- lo = ord(co_code[next_instr])
- hi = ord(co_code[next_instr+1])
- next_instr += 2
- oparg = (hi * 256) | lo
- else:
- oparg = 0
+ oparg = ord(co_code[next_instr + 1])
+ next_instr += 2
# note: the structure of the code here is such that it makes
# (after translation) a big "if/elif" chain, which is then
@@ -175,12 +171,11 @@
while opcode == opcodedesc.EXTENDED_ARG.index:
opcode = ord(co_code[next_instr])
+ arg = ord(co_code[next_instr + 1])
if opcode < HAVE_ARGUMENT:
raise BytecodeCorruption
- lo = ord(co_code[next_instr+1])
- hi = ord(co_code[next_instr+2])
- next_instr += 3
- oparg = (oparg * 65536) | (hi * 256) | lo
+ next_instr += 2
+ oparg = (oparg * 256) | arg
if opcode == opcodedesc.RETURN_VALUE.index:
w_returnvalue = self.popvalue()
@@ -252,6 +247,8 @@
self.BINARY_TRUE_DIVIDE(oparg, next_instr)
elif opcode == opcodedesc.BINARY_XOR.index:
self.BINARY_XOR(oparg, next_instr)
+ elif opcode == opcodedesc.BUILD_CONST_KEY_MAP.index:
+ self.BUILD_CONST_KEY_MAP(oparg, next_instr)
elif opcode == opcodedesc.BUILD_LIST.index:
self.BUILD_LIST(oparg, next_instr)
elif opcode == opcodedesc.BUILD_LIST_FROM_ARG.index:
@@ -362,8 +359,6 @@
self.LOAD_NAME(oparg, next_instr)
elif opcode == opcodedesc.LOOKUP_METHOD.index:
self.LOOKUP_METHOD(oparg, next_instr)
- elif opcode == opcodedesc.MAKE_CLOSURE.index:
- self.MAKE_CLOSURE(oparg, next_instr)
elif opcode == opcodedesc.MAKE_FUNCTION.index:
self.MAKE_FUNCTION(oparg, next_instr)
elif opcode == opcodedesc.MAP_ADD.index:
@@ -1357,47 +1352,42 @@
self.call_function(oparg, w_varkw, has_vararg=True)
@jit.unroll_safe
- def _make_function(self, oparg, freevars=None):
+ def MAKE_FUNCTION(self, oparg, next_instr):
space = self.space
w_qualname = self.popvalue()
qualname = self.space.unicode_w(w_qualname)
w_codeobj = self.popvalue()
codeobj = self.space.interp_w(PyCode, w_codeobj)
- if freevars is not None:
- # Pop freevars
- self.popvalue()
- posdefaults = oparg & 0xFF
- kwdefaults = (oparg >> 8) & 0xFF
- num_annotations = (oparg >> 16) & 0xFF
- w_ann = None
- if num_annotations:
- names_w = space.fixedview(self.popvalue())
- w_ann = space.newdict(strdict=True)
- for i in range(len(names_w) - 1, -1, -1):
- space.setitem(w_ann, names_w[i], self.popvalue())
- kw_defs_w = None
- if kwdefaults:
- kw_defs_w = []
- for i in range(kwdefaults):
- w_defvalue = self.popvalue()
- w_defname = self.popvalue()
- kw_defs_w.append((w_defname, w_defvalue))
- defaultarguments = self.popvalues(posdefaults)
+ assert 0 <= oparg <= 0x0F
+ if oparg & 0x08:
+ w_freevarstuple = self.popvalue()
+ # XXX this list copy is expensive, it's purely for the annotator
+ freevars = [self.space.interp_w(Cell, cell)
+ for cell in self.space.fixedview(w_freevarstuple)]
+ else:
+ freevars = None
+ if oparg & 0x04:
+ w_ann = self.popvalue()
+ else:
+ w_ann = None
+ if oparg & 0x02:
+ w_kw_defs = self.popvalue()
+ # XXX
+ kw_defs_w = [space.unpackiterable(w_tup)
+ for w_tup in space.fixedview(
+ space.call_method(w_kw_defs, 'items'))]
+ else:
+ kw_defs_w = None
+ if oparg & 0x01:
+ defaultarguments = space.fixedview(self.popvalue())
+ else:
+ defaultarguments = []
+
fn = function.Function(space, codeobj, self.get_w_globals(),
defaultarguments,
kw_defs_w, freevars, w_ann, qualname=qualname)
self.pushvalue(fn)
- def MAKE_FUNCTION(self, oparg, next_instr):
- return self._make_function(oparg)
-
- @jit.unroll_safe
- def MAKE_CLOSURE(self, oparg, next_instr):
- w_freevarstuple = self.peekvalue(2)
- freevars = [self.space.interp_w(Cell, cell)
- for cell in self.space.fixedview(w_freevarstuple)]
- self._make_function(oparg, freevars)
-
def BUILD_SLICE(self, numargs, next_instr):
if numargs == 3:
w_step = self.popvalue()
@@ -1447,7 +1437,18 @@
w_value = self.peekvalue(2 * i)
w_key = self.peekvalue(2 * i + 1)
self.space.setitem(w_dict, w_key, w_value)
- self.popvalues(2 * itemcount)
+ self.dropvalues(2 * itemcount)
+ self.pushvalue(w_dict)
+
+ @jit.unroll_safe
+ def BUILD_CONST_KEY_MAP(self, itemcount, next_instr):
+ keys_w = self.space.fixedview(self.popvalue())
+ w_dict = self.space.newdict()
+ for i in range(itemcount):
+ w_value = self.peekvalue(itemcount - 1 - i)
+ w_key = keys_w[i]
+ self.space.setitem(w_dict, w_key, w_value)
+ self.dropvalues(itemcount)
self.pushvalue(w_dict)
@jit.unroll_safe
@@ -1456,7 +1457,7 @@
for i in range(itemcount-1, -1, -1):
w_item = self.peekvalue(i)
self.space.call_method(w_set, 'add', w_item)
- self.popvalues(itemcount)
+ self.dropvalues(itemcount)
self.pushvalue(w_set)
@jit.unroll_safe
diff --git a/pypy/interpreter/test/test_annotations.py b/pypy/interpreter/test/test_annotations.py
--- a/pypy/interpreter/test/test_annotations.py
+++ b/pypy/interpreter/test/test_annotations.py
@@ -132,3 +132,11 @@
''')
""")
+ def test_lineno(self):
+ s = """
+
+a: int
+ """
+ c = compile(s, "f", "exec")
+ assert c.co_firstlineno == 3
+
diff --git a/pypy/interpreter/test/test_function.py b/pypy/interpreter/test/test_function.py
--- a/pypy/interpreter/test/test_function.py
+++ b/pypy/interpreter/test/test_function.py
@@ -643,11 +643,20 @@
class TestMethod:
- def setup_method(self, method):
- def c(self, bar):
- return bar
- code = PyCode._from_code(self.space, c.__code__)
- self.fn = Function(self.space, code, self.space.newdict())
+ @classmethod
+ def compile(cls, src):
+ assert src.strip().startswith("def ")
+ compiler = cls.space.createcompiler()
+ code = compiler.compile(src, '<hello>', 'exec', 0).co_consts_w[0]
+ return Function(cls.space, code, cls.space.newdict())
+
+ def setup_class(cls):
+ src = """
+def c(self, bar):
+ return bar
+ """
+ cls.fn = cls.compile(src)
+
def test_get(self):
space = self.space
@@ -672,9 +681,7 @@
def test_method_get(self):
space = self.space
# Create some function for this test only
- def m(self): return self
- func = Function(space, PyCode._from_code(self.space, m.__code__),
- space.newdict())
+ func = self.compile("def m(self): return self")
# Some shorthands
obj1 = space.wrap(23)
obj2 = space.wrap(42)
@@ -696,6 +703,11 @@
assert meth3 is func
class TestShortcuts(object):
+ def compile(self, src):
+ assert src.strip().startswith("def ")
+ compiler = self.space.createcompiler()
+ code = compiler.compile(src, '<hello>', 'exec', 0).co_consts_w[0]
+ return Function(self.space, code, self.space.newdict())
def test_call_function(self):
space = self.space
@@ -703,14 +715,15 @@
d = {}
for i in range(10):
args = "(" + ''.join(["a%d," % a for a in range(i)]) + ")"
- exec """
+ src = """
def f%s:
return %s
-""" % (args, args) in d
+""" % (args, args)
+ exec src in d
f = d['f']
res = f(*range(i))
- code = PyCode._from_code(self.space, f.__code__)
- fn = Function(self.space, code, self.space.newdict())
+ fn = self.compile(src)
+ code = fn.code
assert fn.code.fast_natural_arity == i|PyCode.FLATPYCALL
if i < 5:
@@ -729,18 +742,18 @@
def test_flatcall(self):
space = self.space
- def f(a):
- return a
- code = PyCode._from_code(self.space, f.__code__)
- fn = Function(self.space, code, self.space.newdict())
+ src = """
+def f(a):
+ return a"""
+ fn = self.compile(src)
assert fn.code.fast_natural_arity == 1|PyCode.FLATPYCALL
def bomb(*args):
assert False, "shortcutting should have avoided this"
- code.funcrun = bomb
- code.funcrun_obj = bomb
+ fn.code.funcrun = bomb
+ fn.code.funcrun_obj = bomb
w_3 = space.newint(3)
w_res = space.call_function(fn, w_3)
@@ -756,18 +769,19 @@
def test_flatcall_method(self):
space = self.space
- def f(self, a):
- return a
- code = PyCode._from_code(self.space, f.__code__)
- fn = Function(self.space, code, self.space.newdict())
+ src = """
+def f(self, a):
+ return a
+"""
+ fn = self.compile(src)
assert fn.code.fast_natural_arity == 2|PyCode.FLATPYCALL
def bomb(*args):
assert False, "shortcutting should have avoided this"
- code.funcrun = bomb
- code.funcrun_obj = bomb
+ fn.code.funcrun = bomb
+ fn.code.funcrun_obj = bomb
w_3 = space.newint(3)
w_res = space.appexec([fn, w_3], """(f, x):
@@ -784,9 +798,11 @@
def test_flatcall_default_arg(self):
space = self.space
- def f(a, b):
- return a+b
- code = PyCode._from_code(self.space, f.__code__)
+ src = """
+def f(a, b):
+ return a+b
+"""
+ code = self.compile(src).code
fn = Function(self.space, code, self.space.newdict(),
defs_w=[space.newint(1)])
@@ -813,9 +829,11 @@
def test_flatcall_default_arg_method(self):
space = self.space
- def f(self, a, b):
- return a+b
- code = PyCode._from_code(self.space, f.__code__)
+ src = """
+def f(self, a, b):
+ return a+b
+ """
+ code = self.compile(src).code
fn = Function(self.space, code, self.space.newdict(),
defs_w=[space.newint(1)])
diff --git a/pypy/interpreter/test/test_pycode.py b/pypy/interpreter/test/test_pycode.py
--- a/pypy/interpreter/test/test_pycode.py
+++ b/pypy/interpreter/test/test_pycode.py
@@ -14,6 +14,6 @@
finally:
sys.stdout = stdout
print '>>>\n' + output + '\n<<<'
- assert ' 1 (7)' in output
+ assert ' 0 (7)' in output
assert ' 4 (None)' in output
- assert ' 19 RETURN_VALUE ' in output
+ assert ' 16 RETURN_VALUE' in output
diff --git a/pypy/interpreter/test/test_zpy.py b/pypy/interpreter/test/test_zpy.py
--- a/pypy/interpreter/test/test_zpy.py
+++ b/pypy/interpreter/test/test_zpy.py
@@ -122,6 +122,3 @@
# '5\n' --- this line sent to stderr
assert ('\t<module>: LOAD_NAME 0 (x)\n'
'\t<module>: PRINT_EXPR 0 \n') in output
- assert ('\t<module>: LOAD_CONST 0 (None)\n'
- '\t<module>: RETURN_VALUE 0 \n'
- '>>>> ') in output
diff --git a/pypy/module/_ast/test/test_ast.py b/pypy/module/_ast/test/test_ast.py
--- a/pypy/module/_ast/test/test_ast.py
+++ b/pypy/module/_ast/test/test_ast.py
@@ -461,7 +461,7 @@
def test_bug_null_in_objspace_type(self):
import ast
- code = ast.Expression(lineno=1, col_offset=1, body=ast.ListComp(lineno=1, col_offset=1, elt=ast.Call(lineno=1, col_offset=1, func=ast.Name(lineno=1, col_offset=1, id='str', ctx=ast.Load(lineno=1, col_offset=1)), args=[ast.Name(lineno=1, col_offset=1, id='x', ctx=ast.Load(lineno=1, col_offset=1))], keywords=[]), generators=[ast.comprehension(lineno=1, col_offset=1, target=ast.Name(lineno=1, col_offset=1, id='x', ctx=ast.Store(lineno=1, col_offset=1)), iter=ast.List(lineno=1, col_offset=1, elts=[ast.Num(lineno=1, col_offset=1, n=23)], ctx=ast.Load(lineno=1, col_offset=1, )), ifs=[])]))
+ code = ast.Expression(lineno=1, col_offset=1, body=ast.ListComp(lineno=1, col_offset=1, elt=ast.Call(lineno=1, col_offset=1, func=ast.Name(lineno=1, col_offset=1, id='str', ctx=ast.Load(lineno=1, col_offset=1)), args=[ast.Name(lineno=1, col_offset=1, id='x', ctx=ast.Load(lineno=1, col_offset=1))], keywords=[]), generators=[ast.comprehension(lineno=1, col_offset=1, target=ast.Name(lineno=1, col_offset=1, id='x', ctx=ast.Store(lineno=1, col_offset=1)), iter=ast.List(lineno=1, col_offset=1, elts=[ast.Num(lineno=1, col_offset=1, n=23)], ctx=ast.Load(lineno=1, col_offset=1, )), ifs=[], is_async=False)]))
compile(code, '<template>', 'eval')
def test_empty_yield_from(self):
diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py
--- a/pypy/module/imp/test/test_import.py
+++ b/pypy/module/imp/test/test_import.py
@@ -788,7 +788,7 @@
f.write(_getlong(mtime))
if co:
# marshal the code object with the PyPy marshal impl
- pyco = PyCode._from_code(space, co)
+ pyco = space.createcompiler().compile(co, '?', 'exec', 0)
w_marshal = space.getbuiltinmodule('marshal')
w_marshaled_code = space.call_method(w_marshal, 'dumps', pyco)
marshaled_code = space.bytes_w(w_marshaled_code)
@@ -809,7 +809,7 @@
def test_read_compiled_module(self):
space = self.space
mtime = 12345
- co = compile('x = 42', '?', 'exec')
+ co = 'x = 42'
cpathname = _testfile(space, importing.get_pyc_magic(space), mtime, co)
stream = streamio.open_file_as_stream(cpathname, "rb")
try:
@@ -829,7 +829,7 @@
def test_load_compiled_module(self):
space = self.space
mtime = 12345
- co = compile('x = 42', '?', 'exec')
+ co = 'x = 42'
cpathname = _testfile(space, importing.get_pyc_magic(space), mtime, co)
w_modulename = space.wrap('somemodule')
stream = streamio.open_file_as_stream(cpathname, "rb")
@@ -854,7 +854,7 @@
def test_load_compiled_module_nopathname(self):
space = self.space
mtime = 12345
- co = compile('x = 42', '?', 'exec')
+ co = 'x = 42'
cpathname = _testfile(space, importing.get_pyc_magic(space), mtime, co)
w_modulename = space.wrap('somemodule')
stream = streamio.open_file_as_stream(cpathname, "rb")
@@ -931,7 +931,7 @@
continue
pathname = "whatever"
mtime = 12345
- co = compile('x = 42', '?', 'exec')
+ co = 'x = 42'
cpathname = _testfile(space1, importing.get_pyc_magic(space1),
mtime, co)
w_modulename = space2.wrap('somemodule')
diff --git a/pypy/tool/dis3.py b/pypy/tool/dis3.py
--- a/pypy/tool/dis3.py
+++ b/pypy/tool/dis3.py
@@ -1,20 +1,40 @@
-"""Disassembler of Python byte code into mnemonics.
-Python 3 dis.py partly backported to Python 2"""
+"""Disassembler of Python byte code into mnemonics."""
+
+from __future__ import print_function
import sys
import types
+import collections
+import io
from opcode3 import *
from opcode3 import __all__ as _opcodes_all
-__all__ = ["dis", "disassemble", "distb", "disco",
- "findlinestarts", "findlabels"] + _opcodes_all
+__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
+ "findlinestarts", "findlabels", "show_code",
+ "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
del _opcodes_all
-_have_code = (types.MethodType, types.FunctionType, types.CodeType, type)
+_have_code = (types.MethodType, types.FunctionType, types.CodeType,
+ classmethod, staticmethod, type)
+
+FORMAT_VALUE = opmap['FORMAT_VALUE']
+
+def _try_compile(source, name):
+ """Attempts to compile the given source, first as an expression and
+ then as a statement if the first approach fails.
+
+ Utility function to accept strings in functions that otherwise
+ expect code objects
+ """
+ try:
+ c = compile(source, name, 'eval')
+ except SyntaxError:
+ c = compile(source, name, 'exec')
+ return c
def dis(x=None):
- """Disassemble classes, methods, functions, or code.
+ """Disassemble classes, methods, functions, generators, or code.
With no argument, disassemble the last traceback.
@@ -22,30 +42,31 @@
if x is None:
distb()
return
- if isinstance(x, types.InstanceType):
- x = x.__class__
- if hasattr(x, 'im_func'):
- x = x.im_func
- if hasattr(x, 'func_code'):
- x = x.func_code
- if hasattr(x, 'co_code'): # PyCode needs co_code before __dict__
+ if hasattr(x, '__func__'): # Method
+ x = x.__func__
+ if hasattr(x, '__code__'): # Function
+ x = x.__code__
+ if hasattr(x, 'gi_code'): # Generator
+ x = x.gi_code
+ if hasattr(x, 'co_code'): # Code object
disassemble(x)
- elif hasattr(x, '__dict__'):
- items = x.__dict__.items()
- items.sort()
+ elif hasattr(x, '__dict__'): # Class or module
+ items = sorted(x.__dict__.items())
for name, x1 in items:
if isinstance(x1, _have_code):
- print "Disassembly of %s:" % name
+ print("Disassembly of %s:" % name)
try:
dis(x1)
except TypeError as msg:
- print "Sorry:", msg
- print
- elif isinstance(x, str):
- disassemble_string(x)
+ print("Sorry:", msg)
+ print()
+ elif isinstance(x, (bytes, bytearray)): # Raw bytecode
+ _disassemble_bytes(x)
+ elif isinstance(x, str): # Source code
+ _disassemble_str(x)
else:
- raise TypeError("don't know how to disassemble %s objects" % \
- type(x).__name__)
+ raise TypeError("don't know how to disassemble %s objects" %
+ type(x).__name__)
def distb(tb=None):
"""Disassemble a traceback (default: last traceback)."""
@@ -57,103 +78,290 @@
while tb.tb_next: tb = tb.tb_next
disassemble(tb.tb_frame.f_code, tb.tb_lasti)
+# The inspect module interrogates this dictionary to build its
+# list of CO_* constants. It is also used by pretty_flags to
+# turn the co_flags field into a human readable list.
+COMPILER_FLAG_NAMES = {
+ 1: "OPTIMIZED",
+ 2: "NEWLOCALS",
+ 4: "VARARGS",
+ 8: "VARKEYWORDS",
+ 16: "NESTED",
+ 32: "GENERATOR",
+ 64: "NOFREE",
+ 128: "COROUTINE",
+ 256: "ITERABLE_COROUTINE",
+ 512: "ASYNC_GENERATOR",
+}
+
+def pretty_flags(flags):
+ """Return pretty representation of code flags."""
+ names = []
+ for i in range(32):
+ flag = 1<<i
+ if flags & flag:
+ names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
+ flags ^= flag
+ if not flags:
+ break
+ else:
+ names.append(hex(flags))
+ return ", ".join(names)
+
+def _get_code_object(x):
+ """Helper to handle methods, functions, generators, strings and raw code objects"""
+ if hasattr(x, '__func__'): # Method
+ x = x.__func__
+ if hasattr(x, '__code__'): # Function
+ x = x.__code__
+ if hasattr(x, 'gi_code'): # Generator
+ x = x.gi_code
+ if isinstance(x, str): # Source code
+ x = _try_compile(x, "<disassembly>")
+ if hasattr(x, 'co_code'): # Code object
+ return x
+ raise TypeError("don't know how to disassemble %s objects" %
+ type(x).__name__)
+
+def code_info(x):
+ """Formatted details of methods, functions, or code."""
+ return _format_code_info(_get_code_object(x))
+
+def _format_code_info(co):
+ lines = []
+ lines.append("Name: %s" % co.co_name)
+ lines.append("Filename: %s" % co.co_filename)
+ lines.append("Argument count: %s" % co.co_argcount)
+ lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
+ lines.append("Number of locals: %s" % co.co_nlocals)
+ lines.append("Stack size: %s" % co.co_stacksize)
+ lines.append("Flags: %s" % pretty_flags(co.co_flags))
+ if co.co_consts:
+ lines.append("Constants:")
+ for i_c in enumerate(co.co_consts):
+ lines.append("%4d: %r" % i_c)
+ if co.co_names:
+ lines.append("Names:")
+ for i_n in enumerate(co.co_names):
+ lines.append("%4d: %s" % i_n)
+ if co.co_varnames:
+ lines.append("Variable names:")
+ for i_n in enumerate(co.co_varnames):
+ lines.append("%4d: %s" % i_n)
+ if co.co_freevars:
+ lines.append("Free variables:")
+ for i_n in enumerate(co.co_freevars):
+ lines.append("%4d: %s" % i_n)
+ if co.co_cellvars:
+ lines.append("Cell variables:")
+ for i_n in enumerate(co.co_cellvars):
+ lines.append("%4d: %s" % i_n)
+ return "\n".join(lines)
+
+def show_code(co, file=None):
+ """Print details of methods, functions, or code to *file*.
+
+ If *file* is not provided, the output is printed on stdout.
+ """
+ print(code_info(co))
+
+_Instruction = collections.namedtuple("_Instruction",
+ "opname opcode arg argval argrepr offset starts_line is_jump_target")
+
+class Instruction(_Instruction):
+ """Details for a bytecode operation
+
+ Defined fields:
+ opname - human readable name for operation
+ opcode - numeric code for operation
+ arg - numeric argument to operation (if any), otherwise None
+ argval - resolved arg value (if known), otherwise same as arg
+ argrepr - human readable description of operation argument
+ offset - start index of operation within bytecode sequence
+ starts_line - line started by this opcode (if any), otherwise None
+ is_jump_target - True if other code jumps to here, otherwise False
+ """
+
+ def _disassemble(self, lineno_width=3, mark_as_current=False):
+ """Format instruction details for inclusion in disassembly output
+
+ *lineno_width* sets the width of the line number field (0 omits it)
+ *mark_as_current* inserts a '-->' marker arrow as part of the line
+ """
+ fields = []
+ # Column: Source code line number
+ if lineno_width:
+ if self.starts_line is not None:
+ lineno_fmt = "%%%dd" % lineno_width
+ fields.append(lineno_fmt % self.starts_line)
+ else:
+ fields.append(' ' * lineno_width)
+ # Column: Current instruction indicator
+ if mark_as_current:
+ fields.append('-->')
+ else:
+ fields.append(' ')
+ # Column: Jump target marker
+ if self.is_jump_target:
+ fields.append('>>')
+ else:
+ fields.append(' ')
+ # Column: Instruction offset from start of code sequence
+ fields.append(repr(self.offset).rjust(4))
+ # Column: Opcode name
+ fields.append(self.opname.ljust(20))
+ # Column: Opcode argument
+ if self.arg is not None:
+ fields.append(repr(self.arg).rjust(5))
+ # Column: Opcode argument details
+ if self.argrepr:
+ fields.append('(' + self.argrepr + ')')
+ return ' '.join(fields).rstrip()
+
+
+def get_instructions(x, first_line=None):
+ """Iterator for the opcodes in methods, functions or code
+
+ Generates a series of Instruction named tuples giving the details of
+ each operations in the supplied code.
+
+ If *first_line* is not None, it indicates the line number that should
+ be reported for the first source line in the disassembled code.
+ Otherwise, the source line information (if any) is taken directly from
+ the disassembled code object.
+ """
+ co = _get_code_object(x)
+ cell_names = co.co_cellvars + co.co_freevars
+ linestarts = dict(findlinestarts(co))
+ if first_line is not None:
+ line_offset = first_line - co.co_firstlineno
+ else:
+ line_offset = 0
+ return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
+ co.co_consts, cell_names, linestarts,
+ line_offset)
+
+def _get_const_info(const_index, const_list):
+ """Helper to get optional details about const references
+
+ Returns the dereferenced constant and its repr if the constant
+ list is defined.
+ Otherwise returns the constant index and its repr().
+ """
+ argval = const_index
+ if const_list is not None:
+ argval = const_list[const_index]
+ return argval, repr(argval)
+
+def _get_name_info(name_index, name_list):
+ """Helper to get optional details about named references
+
+ Returns the dereferenced name as both value and repr if the name
+ list is defined.
+ Otherwise returns the name index and its repr().
+ """
+ argval = name_index
+ if name_list is not None:
+ argval = name_list[name_index]
+ argrepr = argval
+ else:
+ argrepr = repr(argval)
+ return argval, argrepr
+
+
+def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
+ cells=None, linestarts=None, line_offset=0):
+ """Iterate over the instructions in a bytecode string.
+
+ Generates a sequence of Instruction namedtuples giving the details of each
+ opcode. Additional information about the code's runtime environment
+ (e.g. variable names, constants) can be specified using optional
+ arguments.
+
+ """
+ labels = findlabels(code)
+ starts_line = None
+ for offset, op, arg in _unpack_opargs(code):
+ if linestarts is not None:
+ starts_line = linestarts.get(offset, None)
+ if starts_line is not None:
+ starts_line += line_offset
+ is_jump_target = offset in labels
+ argval = None
+ argrepr = ''
+ if arg is not None:
+ # Set argval to the dereferenced value of the argument when
+ # available, and argrepr to the string representation of argval.
+ # _disassemble_bytes needs the string repr of the
+ # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
+ argval = arg
+ if op in hasconst:
+ argval, argrepr = _get_const_info(arg, constants)
+ elif op in hasname:
+ argval, argrepr = _get_name_info(arg, names)
+ elif op in hasjrel:
+ argval = offset + 2 + arg
+ argrepr = "to " + repr(argval)
+ elif op in haslocal:
+ argval, argrepr = _get_name_info(arg, varnames)
+ elif op in hascompare:
+ argval = cmp_op[arg]
+ argrepr = argval
+ elif op in hasfree:
+ argval, argrepr = _get_name_info(arg, cells)
+ elif op == FORMAT_VALUE:
+ argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4))
+ argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3]
+ if argval[1]:
+ if argrepr:
+ argrepr += ', '
+ argrepr += 'with format'
+ yield Instruction(opname[op], op,
+ arg, argval, argrepr,
+ offset, starts_line, is_jump_target)
+
def disassemble(co, lasti=-1):
"""Disassemble a code object."""
- code = co.co_code
- labels = findlabels(code)
+ cell_names = co.co_cellvars + co.co_freevars
linestarts = dict(findlinestarts(co))
- n = len(code)
- i = 0
- extended_arg = 0
- free = None
- while i < n:
- c = code[i]
- op = ord(c)
- if i in linestarts:
- if i > 0:
- print
- print "%3d" % linestarts[i],
- else:
- print ' ',
+ _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
+ co.co_consts, cell_names, linestarts)
- if i == lasti: print '-->',
- else: print ' ',
- if i in labels: print '>>',
- else: print ' ',
- print repr(i).rjust(4),
- print opname[op].ljust(20),
- i = i+1
- if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
- extended_arg = 0
- i = i+2
- if op == EXTENDED_ARG:
- extended_arg = oparg*65536L
- print repr(oparg).rjust(5),
- if op in hasconst:
- print '(' + repr(co.co_consts[oparg]) + ')',
- elif op in hasname:
- print '(' + co.co_names[oparg] + ')',
- elif op in hasjrel:
- print '(to ' + repr(i + oparg) + ')',
- elif op in haslocal:
- print '(' + co.co_varnames[oparg] + ')',
- elif op in hascompare:
- print '(' + cmp_op[oparg] + ')',
- elif op in hasfree:
- if free is None:
- free = co.co_cellvars + co.co_freevars
- print '(' + free[oparg] + ')',
- elif op in hasnargs:
- print '(%d positional, %d keyword pair)' % \
- (ord(code[i-2]), ord(code[i-1])),
- print
+def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
+ constants=None, cells=None, linestarts=None,
+ line_offset=0):
+ # Omit the line number column entirely if we have no line number info
+ show_lineno = linestarts is not None
+ # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
+ lineno_width = 3 if show_lineno else 0
+ for instr in _get_instructions_bytes(code, varnames, names,
+ constants, cells, linestarts,
+ line_offset=line_offset):
+ new_source_line = (show_lineno and
+ instr.starts_line is not None and
+ instr.offset > 0)
+ if new_source_line:
+ print()
+ is_current_instr = instr.offset == lasti
+ print(instr._disassemble(lineno_width, is_current_instr))
-def disassemble_string(code, lasti=-1, varnames=None, names=None,
- constants=None):
- labels = findlabels(code)
- n = len(code)
- i = 0
- while i < n:
- c = code[i]
- op = ord(c)
- if i == lasti: print '-->',
- else: print ' ',
- if i in labels: print '>>',
- else: print ' ',
- print repr(i).rjust(4),
- print opname[op].ljust(15),
- i = i+1
- if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256
- i = i+2
- print repr(oparg).rjust(5),
- if op in hasconst:
- if constants:
- print '(' + repr(constants[oparg]) + ')',
- else:
- print '(%d)'%oparg,
- elif op in hasname:
- if names is not None:
- print '(' + names[oparg] + ')',
- else:
- print '(%d)'%oparg,
- elif op in hasjrel:
- print '(to ' + repr(i + oparg) + ')',
- elif op in haslocal:
- if varnames:
- print '(' + varnames[oparg] + ')',
- else:
- print '(%d)' % oparg,
- elif op in hascompare:
- print '(' + cmp_op[oparg] + ')',
- elif op in hasnargs:
- print '(%d positional, %d keyword pair)' % \
- (ord(code[i-2]), ord(code[i-1])),
- print
+def _disassemble_str(source):
+ """Compile the source string, then disassemble the code object."""
+ disassemble(_try_compile(source, '<dis>'))
disco = disassemble # XXX For backwards compatibility
+def _unpack_opargs(code):
+ extended_arg = 0
+ for i in range(0, len(code), 2):
+ op = ord(code[i])
+ if op >= HAVE_ARGUMENT:
+ arg = ord(code[i+1]) | extended_arg
+ extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
+ else:
+ arg = None
+ yield (i, op, arg)
+
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
@@ -161,23 +369,16 @@
"""
labels = []
- n = len(code)
- i = 0
- while i < n:
- c = code[i]
- op = ord(c)
- i = i+1
- if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256
- i = i+2
- label = -1
+ for offset, op, arg in _unpack_opargs(code):
+ if arg is not None:
if op in hasjrel:
- label = i+oparg
+ label = offset + 2 + arg
elif op in hasjabs:
- label = oparg
- if label >= 0:
- if label not in labels:
- labels.append(label)
+ label = arg
+ else:
+ continue
+ if label not in labels:
+ labels.append(label)
return labels
def findlinestarts(code):
@@ -186,13 +387,15 @@
Generate pairs (offset, lineno) as described in Python/compile.c.
"""
- byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
- line_increments = [ord(c) for c in code.co_lnotab[1::2]]
+ byte_increments = code.co_lnotab[0::2]
+ line_increments = code.co_lnotab[1::2]
lastlineno = None
lineno = code.co_firstlineno
addr = 0
for byte_incr, line_incr in zip(byte_increments, line_increments):
+ byte_incr = ord(byte_incr)
+ line_incr = ord(line_incr)
if byte_incr:
if lineno != lastlineno:
yield (addr, lineno)
@@ -205,27 +408,77 @@
if lineno != lastlineno:
yield (addr, lineno)
+class Bytecode:
+ """The bytecode operations of a piece of code
+
+ Instantiate this with a function, method, string of code, or a code object
+ (as returned by compile()).
+
+ Iterating over this yields the bytecode operations as Instruction instances.
+ """
+ def __init__(self, x, first_line=None, current_offset=None):
+ self.codeobj = co = _get_code_object(x)
+ if first_line is None:
+ self.first_line = co.co_firstlineno
+ self._line_offset = 0
+ else:
+ self.first_line = first_line
+ self._line_offset = first_line - co.co_firstlineno
+ self._cell_names = co.co_cellvars + co.co_freevars
+ self._linestarts = dict(findlinestarts(co))
+ self._original_object = x
+ self.current_offset = current_offset
+
+ def __iter__(self):
+ co = self.codeobj
+ return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
+ co.co_consts, self._cell_names,
+ self._linestarts,
+ line_offset=self._line_offset)
+
+ def __repr__(self):
+ return "{}({!r})".format(self.__class__.__name__,
+ self._original_object)
+
+ @classmethod
+ def from_traceback(cls, tb):
+ """ Construct a Bytecode from the given traceback """
+ while tb.tb_next:
+ tb = tb.tb_next
+ return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
+
+ def info(self):
+ """Return formatted information about the code object."""
+ return _format_code_info(self.codeobj)
+
+ def dis(self):
+ """Return a formatted view of the bytecode operations."""
+ co = self.codeobj
+ if self.current_offset is not None:
+ offset = self.current_offset
+ else:
+ offset = -1
+ with io.StringIO() as output:
+ _disassemble_bytes(co.co_code, varnames=co.co_varnames,
+ names=co.co_names, constants=co.co_consts,
+ cells=self._cell_names,
+ linestarts=self._linestarts,
+ line_offset=self._line_offset,
+ file=output,
+ lasti=offset)
+ return output.getvalue()
+
+
def _test():
"""Simple test program to disassemble a file."""
- if sys.argv[1:]:
- if sys.argv[2:]:
- sys.stderr.write("usage: python dis.py [-|file]\n")
- sys.exit(2)
- fn = sys.argv[1]
- if not fn or fn == "-":
- fn = None
- else:
- fn = None
- if fn is None:
- f = sys.stdin
- else:
- f = open(fn)
- source = f.read()
- if fn is not None:
- f.close()
- else:
- fn = "<stdin>"
- code = compile(source, fn, "exec")
+ import argparse
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
+ args = parser.parse_args()
+ with args.infile as infile:
+ source = infile.read()
+ code = compile(source, args.infile.name, "exec")
dis(code)
if __name__ == "__main__":
diff --git a/pypy/tool/opcode3.py b/pypy/tool/opcode3.py
--- a/pypy/tool/opcode3.py
+++ b/pypy/tool/opcode3.py
@@ -2,10 +2,8 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
-"Backported" from Python 3 to Python 2 land - an excact copy of lib-python/3/opcode.py
"""
-
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG", "hasnargs"]
@@ -33,10 +31,12 @@
haslocal = []
hascompare = []
hasfree = []
-hasnargs = [] # unused
+hasnargs = []
opmap = {}
-opname = ['<%r>' % (op,) for op in range(256)]
+opname = [''] * 256
+for op in range(256): opname[op] = '<%r>' % (op,)
+del op
def def_op(name, op):
opname[op] = name
@@ -174,9 +174,11 @@
name_op('STORE_ANNOTATION', 127) # Index in name list
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
-def_op('CALL_FUNCTION', 131) # #args
-def_op('MAKE_FUNCTION', 132) # Flags
+def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
+hasnargs.append(131)
+def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
+def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
@@ -186,8 +188,12 @@
def_op('DELETE_DEREF', 138)
hasfree.append(138)
-def_op('CALL_FUNCTION_KW', 141) # #args + #kwargs
-def_op('CALL_FUNCTION_EX', 142) # Flags
+def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
+hasnargs.append(140)
+def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
+hasnargs.append(141)
+def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
+hasnargs.append(142)
jrel_op('SETUP_WITH', 143)
@@ -198,6 +204,8 @@
def_op('LOAD_CLASSDEREF', 148)
hasfree.append(148)
+jrel_op('SETUP_ASYNC_WITH', 154)
+
def_op('EXTENDED_ARG', 144)
EXTENDED_ARG = 144
@@ -207,12 +215,9 @@
def_op('BUILD_TUPLE_UNPACK', 152)
def_op('BUILD_SET_UNPACK', 153)
-jrel_op('SETUP_ASYNC_WITH', 154)
-
-def_op('FORMAT_VALUE', 155)
-def_op('BUILD_CONST_KEY_MAP', 156)
-def_op('BUILD_STRING', 157)
-def_op('BUILD_TUPLE_UNPACK_WITH_CALL', 158)
+def_op('FORMAT_VALUE', 155) # in CPython 3.6, but available in PyPy from 3.5
+def_op("BUILD_CONST_KEY_MAP", 156)
+def_op('BUILD_STRING', 157) # in CPython 3.6, but available in PyPy from 3.5
# pypy modification, experimental bytecode
def_op('LOOKUP_METHOD', 201) # Index in name list
diff --git a/pypy/tool/pydis.py b/pypy/tool/pydis.py
deleted file mode 100644
--- a/pypy/tool/pydis.py
+++ /dev/null
@@ -1,202 +0,0 @@
-"""disassembler of Python byte code into mnemonics.
-
-XXX this only works for python-2.3 because of the linenumber
- optimization
-
-"""
-
-import sys
-
-from pypy.tool import stdlib_opcode
-from pypy.tool.stdlib_opcode import *
-
-__all__ = ["dis","pydisassemble","distb","disco"] + stdlib_opcode.__all__
-
-EXTENDED_ARG = stdlib_opcode.opcodedesc.EXTENDED_ARG.index
-
-
-class Bytecode:
- def __init__(self, disresult, bytecodeindex, oparg, lineno):
- self.disresult = disresult
- self.index = bytecodeindex
- self.op = ord(disresult.code.co_code[self.index])
- self.name = opname[self.op]
- self.oparg = oparg
- self.lineno = lineno
-
- def __eq__(self, other):
- return (self.__class__ == other.__class__ and
- self.index == other.index and
- self.op == other.op and
- self.name == other.name and
- self.oparg == other.oparg)
-
- def __ne__(self, other):
- return not (self == other)
-
- def reprargstring(self, space = None):
- """ return a string representation of any arguments. (empty for no args)"""
- oparg = self.oparg
- if oparg is None:
- return ''
- co = self.disresult.code
- op = self.op
-
- s = repr(oparg).rjust(5) + " "
- if op in hasconst:
- consts = self.get_consts(space)
- s += '(' + consts[oparg] + ')'
- elif op in hasname:
- s += '(' + co.co_names[oparg] + ')'
- elif op in hasjrel:
- s += '(to ' + repr(self.index + oparg) + ')'
- elif op in haslocal:
- s += '(' + co.co_varnames[oparg] + ')'
- elif op in hascompare:
- s += '(' + cmp_op[oparg] + ')'
- elif op in hasfree:
- #if free is None:
- free = co.co_cellvars + co.co_freevars
- s += '(' + free[oparg] + ')'
- return s
-
- def get_consts(self, space=None):
- # support both real code objects and PyCode objects
- co = self.disresult.code
- if hasattr(co, "co_consts"):
- return [repr(c) for c in co.co_consts]
-
- if space is None:
- return [repr(c) for c in co.co_consts_w]
-
- r = lambda x: space.str_w(space.repr(x))
- return [r(c) for c in co.co_consts_w]
-
- def repr_with_space(self, space):
- return self.name + self.reprargstring(space)
-
- def __repr__(self):
- return self.name + self.reprargstring()
-
-class DisResult:
- """ an instance of this class gets returned for disassembling
- objects/functions/code objects whatever.
- """
- def __init__(self, code):
- self.code = code
- self.bytecodes = []
-
- def append(self, bytecodeindex, oparg, lineno):
- """ append bytecode anaylsis information ..."""
- bc = Bytecode(self, bytecodeindex, oparg, lineno)
- self.bytecodes.append(bc)
-
- def getbytecode(self, index):
- """ return bytecode instance matching the given index. """
- for bytecode in self.bytecodes:
- if bytecode.index == index:
- return bytecode
- raise ValueError("no bytecode found on index %s in code \n%s" % (
- index, pydis(self.code)))
-
- def format(self):
- lastlineno = -1
- labels = findlabels(self.code.co_code)
- lines = []
- for bc in self.bytecodes:
- l = []
- if bc.lineno != lastlineno:
- lastlineno = bc.lineno
- l.append("%3d" % bc.lineno)
- else:
- l.append(" ")
- l.append(bc.index in labels and ">>" or " ")
- l.append(repr(bc.index).rjust(4))
- l.append(bc.name.ljust(20))
- l.append(bc.reprargstring())
- lines.append(" ".join(l))
- return "\n".join(lines)
-
- __repr__ = format
-
-def pydis(co):
- """return result of dissassembling a code object. """
-
- if hasattr(co, 'func_code'):
- co = co.func_code
-
- if hasattr(co, 'code'):
- co = co.code
-
- disresult = DisResult(co)
- code = co.co_code
-
- byte_increments = [ord(c) for c in co.co_lnotab[0::2]]
- line_increments = [ord(c) for c in co.co_lnotab[1::2]]
- table_length = len(byte_increments)
-
- lineno = co.co_firstlineno
- table_index = 0
- while (table_index < table_length
- and byte_increments[table_index] == 0):
- lineno += line_increments[table_index]
- table_index += 1
- addr = 0
- line_incr = 0
-
- n = len(code)
- i = 0
- extended_arg = 0
- while i < n:
- c = code[i]
- op = ord(c)
-
- if i >= addr:
- lineno += line_incr
- while table_index < table_length:
- addr += byte_increments[table_index]
- line_incr = line_increments[table_index]
- table_index += 1
- if line_incr:
- break
- else:
- addr = sys.maxint
- current_bytecodeindex = i
- i = i+1
- oparg = None
- if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
- extended_arg = 0
- i = i+2
- if op == EXTENDED_ARG:
- extended_arg = oparg*65536L
-
- disresult.append(current_bytecodeindex, oparg, lineno)
- assert disresult is not None
- return disresult
-
-def findlabels(code):
- """Detect all offsets in a byte code which are jump targets.
-
- Return the list of offsets.
-
- """
- labels = []
- n = len(code)
- i = 0
- while i < n:
- c = code[i]
- op = ord(c)
- i = i+1
- if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256
- i = i+2
More information about the pypy-commit
mailing list