[pypy-commit] pypy default: Add an official alternative format for debug_merge_point, for
arigo
pypy.commits at gmail.com
Sun Nov 13 04:21:42 EST 2016
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r88336:66353dacc7ad
Date: 2016-11-13 10:20 +0100
http://bitbucket.org/pypy/pypy/changeset/66353dacc7ad/
Log: Add an official alternative format for debug_merge_point, for all
interpreters != PyPy2
diff --git a/rpython/tool/disassembler.py b/rpython/tool/disassembler.py
--- a/rpython/tool/disassembler.py
+++ b/rpython/tool/disassembler.py
@@ -50,6 +50,9 @@
current_lineno = opcode.lineno
self.source = source.split("\n")
+ def get_opcode_from_info(self, info):
+ return self.map[info.bytecode_no]
+
def _setup():
for opcode in opname:
if not opcode.startswith('<'):
diff --git a/rpython/tool/jitlogparser/parser.py b/rpython/tool/jitlogparser/parser.py
--- a/rpython/tool/jitlogparser/parser.py
+++ b/rpython/tool/jitlogparser/parser.py
@@ -11,15 +11,28 @@
filename = None
bytecode_no = 0
bytecode_name = None
- m = re.search('<code object ([<>\w]+)[\.,] file \'(.+?)\'[\.,] line (\d+)> #(\d+) (\w+)',
- arg)
+ mask = 0
+ # generic format: the numbers are 'startlineno-currentlineno',
+ # and this function returns currentlineno as the value
+ # 'bytecode_no = currentlineno ^ -1': i.e. it abuses bytecode_no,
+ # which doesn't make sense in the generic format, as a negative
+ # number
+ m = re.match(r'(.+?);(.+?):(\d+)-(\d+) (.*)', arg)
+ if m is not None:
+ mask = -1
+ else:
+ # PyPy2 format: bytecode_no is really a bytecode index,
+ # which must be turned into a real line number by parsing the
+ # source file
+ m = re.search(r'<code object ([<>\w]+)[\.,] file \'(.+?)\'[\.,] '
+ r'line (\d+)> #(\d+) (\w+)', arg)
if m is None:
# a non-code loop, like StrLiteralSearch or something
if arg:
bytecode_name = arg
else:
name, filename, lineno, bytecode_no, bytecode_name = m.groups()
- return name, bytecode_name, filename, int(lineno), int(bytecode_no)
+ return name, bytecode_name, filename, int(lineno), int(bytecode_no) ^ mask
class Op(object):
bridge = None
@@ -195,8 +208,9 @@
self.startlineno, self.bytecode_no) = parsed
self.operations = operations
self.storage = storage
+ generic_format = (self.bytecode_no < 0)
self.code = storage.disassemble_code(self.filename, self.startlineno,
- self.name)
+ self.name, generic_format)
def repr(self):
if self.filename is None:
@@ -213,7 +227,7 @@
def getopcode(self):
if self.code is None:
return None
- return self.code.map[self.bytecode_no]
+ return self.code.get_opcode_from_info(self)
def getlineno(self):
code = self.getopcode()
diff --git a/rpython/tool/jitlogparser/storage.py b/rpython/tool/jitlogparser/storage.py
--- a/rpython/tool/jitlogparser/storage.py
+++ b/rpython/tool/jitlogparser/storage.py
@@ -5,6 +5,7 @@
import py
import os
+import linecache
from rpython.tool.disassembler import dis
from rpython.tool.jitlogparser.module_finder import gather_all_code_objs
@@ -29,7 +30,10 @@
self.codes[fname] = res
return res
- def disassemble_code(self, fname, startlineno, name):
+ def disassemble_code(self, fname, startlineno, name, generic_format=False):
+ # 'generic_format' is False for PyPy2 (returns a
+ # disassembler.CodeRepresentation) or True otherwise (returns a
+ # GenericCode, without attempting any disassembly)
try:
if py.path.local(fname).check(file=False):
return None # cannot find source file
@@ -39,6 +43,10 @@
try:
return self.disassembled_codes[key]
except KeyError:
+ pass
+ if generic_format:
+ res = GenericCode(fname, startlineno, name)
+ else:
codeobjs = self.load_code(fname)
if (startlineno, name) not in codeobjs:
# cannot find the code obj at this line: this can happen for
@@ -50,8 +58,8 @@
return None
code = codeobjs[(startlineno, name)]
res = dis(code)
- self.disassembled_codes[key] = res
- return res
+ self.disassembled_codes[key] = res
+ return res
def reconnect_loops(self, loops):
""" Re-connect loops in a way that entry bridges are filtered out
@@ -80,3 +88,40 @@
res.append(loop)
self.loops = res
return res
+
+
+class GenericCode(object):
+ def __init__(self, fname, startlineno, name):
+ self._fname = fname
+ self._startlineno = startlineno
+ self._name = name
+ self._first_bytecodes = {} # {lineno: info}
+ self._source = None
+
+ def __repr__(self):
+ return 'GenericCode(%r, %r, %r)' % (
+ self._fname, self._startlineno, self._name)
+
+ def get_opcode_from_info(self, info):
+ lineno = ~info.bytecode_no
+ if self._first_bytecodes.setdefault(lineno, info) is info:
+ # this is the first opcode of the line---or, at least,
+ # the first time we ask for an Opcode on that line.
+ line_starts_here = True
+ else:
+ line_starts_here = False
+ return GenericOpcode(lineno, line_starts_here)
+
+ @property
+ def source(self):
+ if self._source is None:
+ src = linecache.getlines(self._fname)
+ if self._startlineno > 0:
+ src = src[self._startlineno - 1:]
+ self._source = [s.rstrip('\n\r') for s in src]
+ return self._source
+
+class GenericOpcode(object):
+ def __init__(self, lineno, line_starts_here):
+ self.lineno = lineno
+ self.line_starts_here = line_starts_here
diff --git a/rpython/tool/jitlogparser/test/test_parser.py b/rpython/tool/jitlogparser/test/test_parser.py
--- a/rpython/tool/jitlogparser/test/test_parser.py
+++ b/rpython/tool/jitlogparser/test/test_parser.py
@@ -2,7 +2,8 @@
Function, adjust_bridges,
import_log, split_trace, Op,
parse_log_counts)
-from rpython.tool.jitlogparser.storage import LoopStorage
+from rpython.tool.jitlogparser.storage import LoopStorage, GenericCode
+from rpython.tool.udir import udir
import py, sys
from rpython.jit.backend.detect_cpu import autodetect
from rpython.jit.backend.tool.viewcode import ObjdumpNotFound
@@ -381,4 +382,58 @@
""")
f = Function.from_operations(loop.operations, LoopStorage())
assert len(f.chunks) == 2
-
+
+def test_embedded_lineno():
+ # debug_merge_point() can have a text that is either:
+ #
+ # * the PyPy2's <code object %s. file '%s'. line %d> #%d %s>
+ # funcname, filename, lineno, bytecode_no, bytecode_name
+ #
+ # * a standard text of the form %s;%s:%d-%d-%d %s
+ # funcname, filename, startlineno, curlineno, endlineno, anything
+ #
+ # * or anything else, which is not specially recognized but shouldn't crash
+ #
+ sourcefile = str(udir.join('test_embedded_lineno.src'))
+ with open(sourcefile, 'w') as f:
+ print >> f, "A#1"
+ print >> f, "B#2"
+ print >> f, "C#3"
+ print >> f, "D#4"
+ print >> f, "E#5"
+ print >> f, "F#6"
+ loop = parse("""
+ []
+ debug_merge_point(0, 0, 'myfunc;%(filename)s:2-2 one')
+ debug_merge_point(0, 0, 'myfunc;%(filename)s:2-2 two')
+ debug_merge_point(0, 0, 'myfunc;%(filename)s:2-4 three')
+ debug_merge_point(0, 0, 'myfunc;%(filename)s:2-4 four')
+ """ % {'filename': sourcefile})
+ f = Function.from_operations(loop.operations, LoopStorage())
+
+ expect = [(2, 'one', True),
+ (2, 'two', False),
+ (4, 'three', True),
+ (4, 'four', False)]
+ assert len(f.chunks) == len(expect)
+
+ code_seen = set()
+ for chunk, (expected_lineno,
+ expected_bytecode_name,
+ expected_line_starts_here) in zip(f.chunks, expect):
+ assert chunk.name == 'myfunc'
+ assert chunk.bytecode_name == expected_bytecode_name
+ assert chunk.filename == sourcefile
+ assert chunk.startlineno == 2
+ assert chunk.bytecode_no == ~expected_lineno # half-abuse
+ assert chunk.has_valid_code()
+ assert chunk.lineno == expected_lineno
+ assert chunk.line_starts_here == expected_line_starts_here
+ code_seen.add(chunk.code)
+
+ assert len(code_seen) == 1
+ code, = code_seen
+ assert code.source[0] == "B#2"
+ assert code.source[1] == "C#3"
+ assert code.source[4] == "F#6"
+ py.test.raises(IndexError, "code.source[5]")
More information about the pypy-commit
mailing list