[pypy-svn] pypy default: merge jit-lsprofile
berdario
commits-noreply at bitbucket.org
Mon Apr 25 13:47:37 CEST 2011
Author: Dario Bertini <berdario at gmail.com>
Branch:
Changeset: r43573:6cfa4ea038ab
Date: 2011-04-25 13:41 +0200
http://bitbucket.org/pypy/pypy/changeset/6cfa4ea038ab/
Log: merge jit-lsprofile
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -532,7 +532,10 @@
raise LLFatalError(msg, LLException(ll_exc_type, ll_exc))
def op_debug_llinterpcall(self, pythonfunction, *args_ll):
- return pythonfunction(*args_ll)
+ try:
+ return pythonfunction(*args_ll)
+ except:
+ self.make_llexception()
def op_debug_start_traceback(self, *args):
pass # xxx write debugging code here?
diff --git a/pypy/translator/c/src/debug_print.h b/pypy/translator/c/src/debug_print.h
--- a/pypy/translator/c/src/debug_print.h
+++ b/pypy/translator/c/src/debug_print.h
@@ -20,7 +20,6 @@
Note that 'fname' can be '-' to send the logging data to stderr.
*/
-
/* macros used by the generated code */
#define PYPY_HAVE_DEBUG_PRINTS (pypy_have_debug_prints & 1 ? \
(pypy_debug_ensure_opened(), 1) : 0)
@@ -40,174 +39,24 @@
extern long pypy_have_debug_prints;
extern FILE *pypy_debug_file;
+#define OP_LL_READ_TIMESTAMP(val) READ_TIMESTAMP(val)
-/* implementations */
+#include "src/asm.h"
-#ifndef PYPY_NOT_MAIN_FILE
-#include <string.h>
-
-#if defined(__GNUC__) && defined(__linux__)
-# include <sched.h>
- static void pypy_setup_profiling()
- {
- cpu_set_t set;
- CPU_ZERO(&set);
- CPU_SET(0, &set); /* restrict to a single cpu */
- sched_setaffinity(0, sizeof(cpu_set_t), &set);
- }
-#else
-static void pypy_setup_profiling() { }
-#endif
-
-long pypy_have_debug_prints = -1;
-FILE *pypy_debug_file = NULL;
-static bool_t debug_ready = 0;
-static bool_t debug_profile = 0;
-static char *debug_start_colors_1 = "";
-static char *debug_start_colors_2 = "";
-static char *debug_stop_colors = "";
-static char *debug_prefix = NULL;
-
-static void pypy_debug_open(void)
-{
- char *filename = getenv("PYPYLOG");
- if (filename)
-#ifndef MS_WINDOWS
- unsetenv("PYPYLOG"); /* don't pass it to subprocesses */
-#else
- putenv("PYPYLOG="); /* don't pass it to subprocesses */
-#endif
- if (filename && filename[0])
- {
- char *colon = strchr(filename, ':');
- if (!colon)
- {
- /* PYPYLOG=filename --- profiling version */
- debug_profile = 1;
- pypy_setup_profiling();
- }
- else
- {
- /* PYPYLOG=prefix:filename --- conditional logging */
- int n = colon - filename;
- debug_prefix = malloc(n + 1);
- memcpy(debug_prefix, filename, n);
- debug_prefix[n] = '\0';
- filename = colon + 1;
- }
- if (strcmp(filename, "-") != 0)
- pypy_debug_file = fopen(filename, "w");
- }
- if (!pypy_debug_file)
- {
- pypy_debug_file = stderr;
- if (isatty(2))
- {
- debug_start_colors_1 = "\033[1m\033[31m";
- debug_start_colors_2 = "\033[31m";
- debug_stop_colors = "\033[0m";
- }
- }
- debug_ready = 1;
-}
-
-void pypy_debug_ensure_opened(void)
-{
- if (!debug_ready)
- pypy_debug_open();
-}
-
-
-#ifndef READ_TIMESTAMP
/* asm_xxx.h may contain a specific implementation of READ_TIMESTAMP.
* This is the default generic timestamp implementation.
*/
+#ifndef READ_TIMESTAMP
+
# ifdef _WIN32
# define READ_TIMESTAMP(val) QueryPerformanceCounter((LARGE_INTEGER*)&(val))
# else
# include <time.h>
# include <sys/time.h>
+
+long long pypy_read_timestamp();
+
# define READ_TIMESTAMP(val) (val) = pypy_read_timestamp()
- static long long pypy_read_timestamp(void)
- {
-# ifdef CLOCK_THREAD_CPUTIME_ID
- struct timespec tspec;
- clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);
- return ((long long)tspec.tv_sec) * 1000000000LL + tspec.tv_nsec;
-# else
- /* argh, we don't seem to have clock_gettime(). Bad OS. */
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return ((long long)tv.tv_sec) * 1000000LL + tv.tv_usec;
-# endif
- }
# endif
#endif
-
-
-static bool_t startswithoneof(const char *str, const char *substr)
-{
- const char *p = str;
- for (; *substr; substr++)
- {
- if (*substr != ',')
- {
- if (p && *p++ != *substr)
- p = NULL; /* mismatch */
- }
- else if (p != NULL)
- return 1; /* match */
- else
- p = str; /* mismatched, retry with the next */
- }
- return p != NULL;
-}
-
-#if defined(_MSC_VER) || defined(__MINGW32__)
-#define PYPY_LONG_LONG_PRINTF_FORMAT "I64"
-#else
-#define PYPY_LONG_LONG_PRINTF_FORMAT "ll"
-#endif
-
-static void display_startstop(const char *prefix, const char *postfix,
- const char *category, const char *colors)
-{
- long long timestamp;
- READ_TIMESTAMP(timestamp);
- fprintf(pypy_debug_file, "%s[%"PYPY_LONG_LONG_PRINTF_FORMAT"x] %s%s%s\n%s",
- colors,
- timestamp, prefix, category, postfix,
- debug_stop_colors);
-}
-
-void pypy_debug_start(const char *category)
-{
- pypy_debug_ensure_opened();
- /* Enter a nesting level. Nested debug_prints are disabled by default
- because the following left shift introduces a 0 in the last bit.
- Note that this logic assumes that we are never going to nest
- debug_starts more than 31 levels (63 on 64-bits). */
- pypy_have_debug_prints <<= 1;
- if (!debug_profile)
- {
- /* non-profiling version */
- if (!debug_prefix || !startswithoneof(category, debug_prefix))
- {
- /* wrong section name, or no PYPYLOG at all, skip it */
- return;
- }
- /* else make this subsection active */
- pypy_have_debug_prints |= 1;
- }
- display_startstop("{", "", category, debug_start_colors_1);
-}
-
-void pypy_debug_stop(const char *category)
-{
- if (debug_profile | (pypy_have_debug_prints & 1))
- display_startstop("", "}", category, debug_start_colors_2);
- pypy_have_debug_prints >>= 1;
-}
-
-#endif /* PYPY_NOT_MAIN_FILE */
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -2100,6 +2100,23 @@
assert self.meta_interp(f, [5, 100]) == 0
self.check_loops(int_rshift=1, everywhere=True)
+
+ def test_read_timestamp(self):
+ import time
+ from pypy.rlib.rtimer import read_timestamp
+ def busy_loop():
+ start = time.time()
+ while time.time() - start < 0.1:
+ # busy wait
+ pass
+
+ def f():
+ t1 = read_timestamp()
+ busy_loop()
+ t2 = read_timestamp()
+ return t2 - t1 > 1000
+ res = self.interp_operations(f, [])
+ assert res
class TestOOtype(BasicTests, OOJitMixin):
diff --git a/pypy/translator/c/src/debug_print.c b/pypy/translator/c/src/debug_print.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/debug_print.c
@@ -0,0 +1,150 @@
+
+#include <string.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <stdio.h>
+#include <unistd.h>
+#include "src/profiling.h"
+#include "src/debug_print.h"
+
+long pypy_have_debug_prints = -1;
+FILE *pypy_debug_file = NULL;
+static unsigned char debug_ready = 0;
+static unsigned char debug_profile = 0;
+static char *debug_start_colors_1 = "";
+static char *debug_start_colors_2 = "";
+static char *debug_stop_colors = "";
+static char *debug_prefix = NULL;
+
+static void pypy_debug_open(void)
+{
+ char *filename = getenv("PYPYLOG");
+ if (filename)
+#ifndef MS_WINDOWS
+ unsetenv("PYPYLOG"); /* don't pass it to subprocesses */
+#else
+ putenv("PYPYLOG="); /* don't pass it to subprocesses */
+#endif
+ if (filename && filename[0])
+ {
+ char *colon = strchr(filename, ':');
+ if (!colon)
+ {
+ /* PYPYLOG=filename --- profiling version */
+ debug_profile = 1;
+ pypy_setup_profiling();
+ }
+ else
+ {
+ /* PYPYLOG=prefix:filename --- conditional logging */
+ int n = colon - filename;
+ debug_prefix = malloc(n + 1);
+ memcpy(debug_prefix, filename, n);
+ debug_prefix[n] = '\0';
+ filename = colon + 1;
+ }
+ if (strcmp(filename, "-") != 0)
+ pypy_debug_file = fopen(filename, "w");
+ }
+ if (!pypy_debug_file)
+ {
+ pypy_debug_file = stderr;
+ if (isatty(2))
+ {
+ debug_start_colors_1 = "\033[1m\033[31m";
+ debug_start_colors_2 = "\033[31m";
+ debug_stop_colors = "\033[0m";
+ }
+ }
+ debug_ready = 1;
+}
+
+void pypy_debug_ensure_opened(void)
+{
+ if (!debug_ready)
+ pypy_debug_open();
+}
+
+
+#ifndef _WIN32
+
+ static long long pypy_read_timestamp(void)
+ {
+# ifdef CLOCK_THREAD_CPUTIME_ID
+ struct timespec tspec;
+ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);
+ return ((long long)tspec.tv_sec) * 1000000000LL + tspec.tv_nsec;
+# else
+ /* argh, we don't seem to have clock_gettime(). Bad OS. */
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return ((long long)tv.tv_sec) * 1000000LL + tv.tv_usec;
+# endif
+ }
+#endif
+
+
+static unsigned char startswithoneof(const char *str, const char *substr)
+{
+ const char *p = str;
+ for (; *substr; substr++)
+ {
+ if (*substr != ',')
+ {
+ if (p && *p++ != *substr)
+ p = NULL; /* mismatch */
+ }
+ else if (p != NULL)
+ return 1; /* match */
+ else
+ p = str; /* mismatched, retry with the next */
+ }
+ return p != NULL;
+}
+
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#define PYPY_LONG_LONG_PRINTF_FORMAT "I64"
+#else
+#define PYPY_LONG_LONG_PRINTF_FORMAT "ll"
+#endif
+
+static void display_startstop(const char *prefix, const char *postfix,
+ const char *category, const char *colors)
+{
+ long long timestamp;
+ READ_TIMESTAMP(timestamp);
+ fprintf(pypy_debug_file, "%s[%"PYPY_LONG_LONG_PRINTF_FORMAT"x] %s%s%s\n%s",
+ colors,
+ timestamp, prefix, category, postfix,
+ debug_stop_colors);
+}
+
+void pypy_debug_start(const char *category)
+{
+ pypy_debug_ensure_opened();
+ /* Enter a nesting level. Nested debug_prints are disabled by default
+ because the following left shift introduces a 0 in the last bit.
+ Note that this logic assumes that we are never going to nest
+ debug_starts more than 31 levels (63 on 64-bits). */
+ pypy_have_debug_prints <<= 1;
+ if (!debug_profile)
+ {
+ /* non-profiling version */
+ if (!debug_prefix || !startswithoneof(category, debug_prefix))
+ {
+ /* wrong section name, or no PYPYLOG at all, skip it */
+ return;
+ }
+ /* else make this subsection active */
+ pypy_have_debug_prints |= 1;
+ }
+ display_startstop("{", "", category, debug_start_colors_1);
+}
+
+void pypy_debug_stop(const char *category)
+{
+ if (debug_profile | (pypy_have_debug_prints & 1))
+ display_startstop("", "}", category, debug_start_colors_2);
+ pypy_have_debug_prints >>= 1;
+}
diff --git a/pypy/jit/codewriter/longlong.py b/pypy/jit/codewriter/longlong.py
--- a/pypy/jit/codewriter/longlong.py
+++ b/pypy/jit/codewriter/longlong.py
@@ -16,6 +16,7 @@
from pypy.rlib.objectmodel import compute_hash
+ is_64_bit = True
supports_longlong = False
r_float_storage = float
FLOATSTORAGE = lltype.Float
@@ -32,6 +33,7 @@
from pypy.rlib import rarithmetic, longlong2float
+ is_64_bit = False
supports_longlong = True
r_float_storage = rarithmetic.r_longlong
FLOATSTORAGE = lltype.SignedLongLong
diff --git a/pypy/module/pypyjit/interp_jit.py b/pypy/module/pypyjit/interp_jit.py
--- a/pypy/module/pypyjit/interp_jit.py
+++ b/pypy/module/pypyjit/interp_jit.py
@@ -20,33 +20,33 @@
'fastlocals_w[*]',
'last_exception',
'lastblock',
+ 'is_being_profiled',
]
JUMP_ABSOLUTE = opmap['JUMP_ABSOLUTE']
-def get_printable_location(next_instr, bytecode):
+def get_printable_location(next_instr, is_being_profiled, bytecode):
from pypy.tool.stdlib_opcode import opcode_method_names
name = opcode_method_names[ord(bytecode.co_code[next_instr])]
return '%s #%d %s' % (bytecode.get_repr(), next_instr, name)
-def get_jitcell_at(next_instr, bytecode):
- return bytecode.jit_cells.get(next_instr, None)
+def get_jitcell_at(next_instr, is_being_profiled, bytecode):
+ return bytecode.jit_cells.get((next_instr, is_being_profiled), None)
-def set_jitcell_at(newcell, next_instr, bytecode):
- bytecode.jit_cells[next_instr] = newcell
+def set_jitcell_at(newcell, next_instr, is_being_profiled, bytecode):
+ bytecode.jit_cells[next_instr, is_being_profiled] = newcell
-def confirm_enter_jit(next_instr, bytecode, frame, ec):
+def confirm_enter_jit(next_instr, is_being_profiled, bytecode, frame, ec):
return (frame.w_f_trace is None and
- ec.profilefunc is None and
ec.w_tracefunc is None)
-def can_never_inline(next_instr, bytecode):
+def can_never_inline(next_instr, is_being_profiled, bytecode):
return (bytecode.co_flags & CO_GENERATOR) != 0
class PyPyJitDriver(JitDriver):
reds = ['frame', 'ec']
- greens = ['next_instr', 'pycode']
+ greens = ['next_instr', 'is_being_profiled', 'pycode']
virtualizables = ['frame']
## def compute_invariants(self, reds, next_instr, pycode):
@@ -68,13 +68,16 @@
def dispatch(self, pycode, next_instr, ec):
self = hint(self, access_directly=True)
next_instr = r_uint(next_instr)
+ is_being_profiled = self.is_being_profiled
try:
while True:
pypyjitdriver.jit_merge_point(ec=ec,
- frame=self, next_instr=next_instr, pycode=pycode)
+ frame=self, next_instr=next_instr, pycode=pycode,
+ is_being_profiled=is_being_profiled)
co_code = pycode.co_code
self.valuestackdepth = hint(self.valuestackdepth, promote=True)
next_instr = self.handle_bytecode(co_code, next_instr, ec)
+ is_being_profiled = self.is_being_profiled
except ExitFrame:
return self.popvalue()
@@ -97,7 +100,8 @@
jumpto = r_uint(self.last_instr)
#
pypyjitdriver.can_enter_jit(frame=self, ec=ec, next_instr=jumpto,
- pycode=self.getcode())
+ pycode=self.getcode(),
+ is_being_profiled=self.is_being_profiled)
return jumpto
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -422,12 +422,12 @@
# Method names take the form of
-#
+#
# <instruction name>_<operand type codes>
#
# For example, the method name for "mov reg, immed" is MOV_ri. Operand order
# is Intel-style, with the destination first.
-#
+#
# The operand type codes are:
# r - register
# b - ebp/rbp offset
@@ -565,6 +565,9 @@
# x87 instructions
FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1))
+ # ------------------------------ Random mess -----------------------
+ RDTSC = insn('\x0F\x31')
+
# reserved as an illegal instruction
UD2 = insn('\x0F\x0B')
diff --git a/pypy/jit/tl/pypyjit_demo.py b/pypy/jit/tl/pypyjit_demo.py
--- a/pypy/jit/tl/pypyjit_demo.py
+++ b/pypy/jit/tl/pypyjit_demo.py
@@ -1,17 +1,19 @@
try:
- def main(n):
- def g(n):
- return range(n)
- s = 0
- for i in range(n): # ID: for
- tmp = g(n)
- s += tmp[i] # ID: getitem
- a = 0
- return s
- main(10)
-
+ def g(x):
+ return x - 1
+ def f(x):
+ while x:
+ x = g(x)
+ import cProfile
+ import time
+ t1 = time.time()
+ cProfile.run("f(10000000)")
+ t2 = time.time()
+ f(10000000)
+ t3 = time.time()
+ print t2 - t1, t3 - t2, (t3 - t2) / (t2 - t1)
except Exception, e:
print "Exception: ", type(e)
print e
-
+
diff --git a/pypy/translator/c/genc.py b/pypy/translator/c/genc.py
--- a/pypy/translator/c/genc.py
+++ b/pypy/translator/c/genc.py
@@ -915,6 +915,14 @@
from pypy.rlib.rarithmetic import LONG_BIT
defines['PYPY_LONG_BIT'] = LONG_BIT
+def add_extra_files(eci):
+ srcdir = py.path.local(autopath.pypydir).join('translator', 'c', 'src')
+ files = [
+ srcdir / 'profiling.c',
+ srcdir / 'debug_print.c',
+ ]
+ return eci.merge(ExternalCompilationInfo(separate_module_files=files))
+
def gen_source_standalone(database, modulename, targetdir, eci,
entrypointname, defines={}):
assert database.standalone
@@ -964,6 +972,7 @@
print >>fi, "#define INSTRUMENT_NCOUNTER %d" % n
fi.close()
+ eci = add_extra_files(eci)
eci = eci.convert_sources_to_files(being_main=True)
files, eci = eci.get_module_files()
return eci, filename, sg.getextrafiles() + list(files)
@@ -1010,6 +1019,7 @@
gen_startupcode(f, database)
f.close()
+ eci = add_extra_files(eci)
eci = eci.convert_sources_to_files(being_main=True)
files, eci = eci.get_module_files()
return eci, filename, sg.getextrafiles() + list(files)
diff --git a/pypy/rlib/debug.py b/pypy/rlib/debug.py
--- a/pypy/rlib/debug.py
+++ b/pypy/rlib/debug.py
@@ -175,6 +175,7 @@
c_pythonfunction = hop.inputconst(lltype.Void, pythonfunction)
args_v = [hop.inputarg(hop.args_r[i], arg=i)
for i in range(2, hop.nb_args)]
+ hop.exception_is_here()
return hop.genop('debug_llinterpcall', [c_pythonfunction] + args_v,
resulttype=RESTYPE)
diff --git a/pypy/rpython/test/test_llinterp.py b/pypy/rpython/test/test_llinterp.py
--- a/pypy/rpython/test/test_llinterp.py
+++ b/pypy/rpython/test/test_llinterp.py
@@ -658,3 +658,25 @@
assert x == -42
res = interpret(f, [])
+
+def test_raising_llimpl():
+ from pypy.rpython.extfunc import register_external
+
+ def external():
+ pass
+
+ def raising():
+ raise OSError(15, "abcd")
+
+ ext = register_external(external, [], llimpl=raising, llfakeimpl=raising)
+
+ def f():
+ # this is a useful llfakeimpl that raises an exception
+ try:
+ external()
+ return True
+ except OSError:
+ return False
+
+ res = interpret(f, [])
+ assert not res
diff --git a/pypy/rlib/test/test_rtimer.py b/pypy/rlib/test/test_rtimer.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/test/test_rtimer.py
@@ -0,0 +1,28 @@
+import time
+
+from pypy.rlib.rtimer import read_timestamp
+from pypy.rpython.test.test_llinterp import interpret
+from pypy.translator.c.test.test_genc import compile
+
+def timer():
+ t1 = read_timestamp()
+ start = time.time()
+ while time.time() - start < 0.1:
+ # busy wait
+ pass
+ t2 = read_timestamp()
+ return t2 - t1
+
+def test_timer():
+ diff = timer()
+ # We're counting ticks, verify they look correct
+ assert diff > 1000
+
+def test_annotation():
+ diff = interpret(timer, [])
+ assert diff > 1000
+
+def test_compile_c():
+ function = compile(timer, [])
+ diff = function()
+ assert diff > 1000
\ No newline at end of file
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -367,7 +367,7 @@
self.assembler.regalloc_perform_guard(guard_op, faillocs, arglocs,
result_loc,
current_depths)
- self.possibly_free_vars(guard_op.getfailargs())
+ self.possibly_free_vars(guard_op.getfailargs())
def PerformDiscard(self, op, arglocs):
if not we_are_translated():
@@ -443,7 +443,7 @@
assert isinstance(arg, Box)
if arg not in last_used:
last_used[arg] = i
-
+
longevity = {}
for arg in produced:
if arg in last_used:
@@ -837,7 +837,7 @@
self._call(op, [imm(size), vable] +
[self.loc(op.getarg(i)) for i in range(op.numargs())],
guard_not_forced_op=guard_op)
-
+
def consider_cond_call_gc_wb(self, op):
assert op.result is None
args = op.getarglist()
@@ -1217,6 +1217,29 @@
else:
raise AssertionError("bad unicode item size")
+ def consider_read_timestamp(self, op):
+ tmpbox_high = TempBox()
+ self.rm.force_allocate_reg(tmpbox_high, selected_reg=eax)
+ if longlong.is_64_bit:
+ # on 64-bit, use rax as temporary register and returns the
+ # result in rdx
+ result_loc = self.rm.force_allocate_reg(op.result,
+ selected_reg=edx)
+ self.Perform(op, [], result_loc)
+ else:
+ # on 32-bit, use both eax and edx as temporary registers,
+ # use a temporary xmm register, and returns the result in
+ # another xmm register.
+ tmpbox_low = TempBox()
+ self.rm.force_allocate_reg(tmpbox_low, selected_reg=edx)
+ xmmtmpbox = TempBox()
+ xmmtmploc = self.xrm.force_allocate_reg(xmmtmpbox)
+ result_loc = self.xrm.force_allocate_reg(op.result)
+ self.Perform(op, [xmmtmploc], result_loc)
+ self.xrm.possibly_free_var(xmmtmpbox)
+ self.rm.possibly_free_var(tmpbox_low)
+ self.rm.possibly_free_var(tmpbox_high)
+
def consider_jump(self, op):
assembler = self.assembler
assert self.jump_target_descr is None
diff --git a/pypy/translator/goal/translate.py b/pypy/translator/goal/translate.py
--- a/pypy/translator/goal/translate.py
+++ b/pypy/translator/goal/translate.py
@@ -221,12 +221,14 @@
pdb_plus_show = PdbPlusShow(t) # need a translator to support extended commands
- def debug(got_error):
+ def finish_profiling():
if prof:
prof.disable()
statfilename = 'prof.dump'
log.info('Dumping profiler stats to: %s' % statfilename)
- prof.dump_stats(statfilename)
+ prof.dump_stats(statfilename)
+
+ def debug(got_error):
tb = None
if got_error:
import traceback
@@ -302,9 +304,11 @@
except SystemExit:
raise
except:
+ finish_profiling()
debug(True)
raise SystemExit(1)
else:
+ finish_profiling()
if translateconfig.pdb:
debug(False)
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -5,7 +5,8 @@
from pypy.rpython.lltypesystem import lltype, llmemory, rstr
from pypy.rpython.ootypesystem import ootype
from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.rlib.rarithmetic import ovfcheck, r_uint, intmask
+from pypy.rlib.rarithmetic import ovfcheck, r_uint, intmask, r_longlong
+from pypy.rlib.rtimer import read_timestamp
from pypy.rlib.unroll import unrolling_iterable
from pypy.jit.metainterp.history import BoxInt, BoxPtr, BoxFloat, check_descr
from pypy.jit.metainterp.history import INT, REF, FLOAT, VOID, AbstractDescr
@@ -227,6 +228,15 @@
length = lengthbox.getint()
rstr.copy_unicode_contents(src, dst, srcstart, dststart, length)
+def do_read_timestamp(cpu, _):
+ x = read_timestamp()
+ if longlong.is_64_bit:
+ assert isinstance(x, int) # 64-bit
+ return BoxInt(x)
+ else:
+ assert isinstance(x, r_longlong) # 32-bit
+ return BoxFloat(x)
+
# ____________________________________________________________
##def do_force_token(cpu):
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -25,6 +25,7 @@
from pypy.rlib.objectmodel import ComputedIntSymbolic, we_are_translated
from pypy.rlib.rarithmetic import ovfcheck
from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint
+from pypy.rlib.rtimer import read_timestamp
import py
from pypy.tool.ansi_print import ansi_log
@@ -506,7 +507,7 @@
', '.join(map(str, args)),))
self.fail_args = args
return op.fail_index
-
+
else:
assert 0, "unknown final operation %d" % (op.opnum,)
@@ -856,6 +857,9 @@
opaque_frame = _to_opaque(self)
return llmemory.cast_ptr_to_adr(opaque_frame)
+ def op_read_timestamp(self, descr):
+ return read_timestamp()
+
def op_call_may_force(self, calldescr, func, *args):
assert not self._forced
self._may_force = self.opindex
@@ -937,7 +941,7 @@
class OOFrame(Frame):
OPHANDLERS = [None] * (rop._LAST+1)
-
+
def op_new_with_vtable(self, descr, vtable):
assert descr is None
typedescr = get_class_size(self.memocast, vtable)
@@ -958,7 +962,7 @@
return res
op_getfield_gc_pure = op_getfield_gc
-
+
def op_setfield_gc(self, fielddescr, obj, newvalue):
TYPE = fielddescr.TYPE
fieldname = fielddescr.fieldname
diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py
--- a/pypy/module/_lsprof/interp_lsprof.py
+++ b/pypy/module/_lsprof/interp_lsprof.py
@@ -1,12 +1,39 @@
+import py
from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.function import Method, Function
+from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped
from pypy.interpreter.typedef import (TypeDef, GetSetProperty,
interp_attrproperty)
-from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped
-from pypy.interpreter.function import Method, Function
-from pypy.interpreter.error import OperationError
+from pypy.rlib import jit
+from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.rtimer import read_timestamp, _is_64_bit
+from pypy.rpython.lltypesystem import rffi, lltype
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from pypy.tool.autopath import pypydir
+from pypy.rlib.rarithmetic import r_longlong
+
import time, sys
+# cpu affinity settings
+
+srcdir = py.path.local(pypydir).join('translator', 'c', 'src')
+eci = ExternalCompilationInfo(separate_module_files=
+ [srcdir.join('profiling.c')])
+
+c_setup_profiling = rffi.llexternal('pypy_setup_profiling',
+ [], lltype.Void,
+ compilation_info = eci)
+c_teardown_profiling = rffi.llexternal('pypy_teardown_profiling',
+ [], lltype.Void,
+ compilation_info = eci)
+
+if _is_64_bit:
+ timer_size_int = int
+else:
+ timer_size_int = r_longlong
+
class W_StatsEntry(Wrappable):
def __init__(self, space, frame, callcount, reccallcount, tt, it,
w_sublist):
@@ -74,20 +101,43 @@
l_w = []
for v in values:
if v.callcount != 0:
- l_w.append(v.stats(space, factor))
+ l_w.append(v.stats(space, None, factor))
return space.newlist(l_w)
-class ProfilerEntry(object):
+class ProfilerSubEntry(object):
def __init__(self, frame):
self.frame = frame
- self.tt = 0
- self.it = 0
+ self.ll_tt = r_longlong(0)
+ self.ll_it = r_longlong(0)
self.callcount = 0
self.recursivecallcount = 0
self.recursionLevel = 0
+
+ def stats(self, space, parent, factor):
+ w_sse = W_StatsSubEntry(space, self.frame,
+ self.callcount, self.recursivecallcount,
+ factor * float(self.ll_tt),
+ factor * float(self.ll_it))
+ return space.wrap(w_sse)
+
+ def _stop(self, tt, it):
+ if not we_are_translated():
+ assert type(tt) is timer_size_int
+ assert type(it) is timer_size_int
+ self.recursionLevel -= 1
+ if self.recursionLevel == 0:
+ self.ll_tt += tt
+ else:
+ self.recursivecallcount += 1
+ self.ll_it += it
+ self.callcount += 1
+
+class ProfilerEntry(ProfilerSubEntry):
+ def __init__(self, frame):
+ ProfilerSubEntry.__init__(self, frame)
self.calls = {}
- def stats(self, space, factor):
+ def stats(self, space, dummy, factor):
if self.calls:
w_sublist = space.newlist([sub_entry.stats(space, self, factor)
for sub_entry in self.calls.values()])
@@ -95,67 +145,44 @@
w_sublist = space.w_None
w_se = W_StatsEntry(space, self.frame, self.callcount,
self.recursivecallcount,
- factor * self.tt, factor * self.it, w_sublist)
+ factor * float(self.ll_tt),
+ factor * float(self.ll_it), w_sublist)
return space.wrap(w_se)
-class ProfilerSubEntry(object):
- def __init__(self, frame):
- self.frame = frame
- self.tt = 0
- self.it = 0
- self.callcount = 0
- self.recursivecallcount = 0
- self.recursionLevel = 0
-
- def stats(self, space, parent, factor):
- w_sse = W_StatsSubEntry(space, self.frame,
- self.callcount, self.recursivecallcount,
- factor * self.tt, factor * self.it)
- return space.wrap(w_sse)
+ @jit.purefunction
+ def _get_or_make_subentry(self, entry, make=True):
+ try:
+ return self.calls[entry]
+ except KeyError:
+ if make:
+ subentry = ProfilerSubEntry(entry.frame)
+ self.calls[entry] = subentry
+ return subentry
+ return None
class ProfilerContext(object):
def __init__(self, profobj, entry):
self.entry = entry
- self.subt = 0
+ self.ll_subt = timer_size_int(0)
self.previous = profobj.current_context
entry.recursionLevel += 1
if profobj.subcalls and self.previous:
- caller = self.previous.entry
- try:
- subentry = caller.calls[entry]
- except KeyError:
- subentry = ProfilerSubEntry(entry.frame)
- caller.calls[entry] = subentry
+ caller = jit.hint(self.previous.entry, promote=True)
+ subentry = caller._get_or_make_subentry(entry)
subentry.recursionLevel += 1
- self.t0 = profobj.timer()
+ self.ll_t0 = profobj.ll_timer()
def _stop(self, profobj, entry):
- # XXX factor out two pieces of the same code
- tt = profobj.timer() - self.t0
- it = tt - self.subt
+ tt = profobj.ll_timer() - self.ll_t0
+ it = tt - self.ll_subt
if self.previous:
- self.previous.subt += tt
- entry.recursionLevel -= 1
- if entry.recursionLevel == 0:
- entry.tt += tt
- else:
- entry.recursivecallcount += 1
- entry.it += it
- entry.callcount += 1
+ self.previous.ll_subt += tt
+ entry._stop(tt, it)
if profobj.subcalls and self.previous:
- caller = self.previous.entry
- try:
- subentry = caller.calls[entry]
- except KeyError:
- pass
- else:
- subentry.recursionLevel -= 1
- if subentry.recursionLevel == 0:
- subentry.tt += tt
- else:
- subentry.recursivecallcount += 1
- subentry.it += it
- subentry.callcount += 1
+ caller = jit.hint(self.previous.entry, promote=True)
+ subentry = caller._get_or_make_subentry(entry, False)
+ if subentry is not None:
+ subentry._stop(tt, it)
def create_spec(space, w_arg):
if isinstance(w_arg, Method):
@@ -187,7 +214,7 @@
else:
class_name = space.type(w_arg).getname(space, '?')
return "{'%s' object}" % (class_name,)
-
+
def lsprof_call(space, w_self, frame, event, w_arg):
assert isinstance(w_self, W_Profiler)
if event == 'call':
@@ -209,6 +236,7 @@
pass
class W_Profiler(Wrappable):
+
def __init__(self, space, w_callable, time_unit, subcalls, builtins):
self.subcalls = subcalls
self.builtins = builtins
@@ -218,65 +246,94 @@
self.data = {}
self.builtin_data = {}
self.space = space
+ self.is_enabled = False
+ self.total_timestamp = r_longlong(0)
+ self.total_real_time = 0.0
- def timer(self):
+ def ll_timer(self):
if self.w_callable:
space = self.space
try:
- return space.float_w(space.call_function(self.w_callable))
+ if _is_64_bit:
+ return space.int_w(space.call_function(self.w_callable))
+ else:
+ return space.r_longlong_w(space.call_function(self.w_callable))
except OperationError, e:
e.write_unraisable(space, "timer function ",
self.w_callable)
- return 0.0
- return time.time()
+ return timer_size_int(0)
+ return read_timestamp()
def enable(self, space, w_subcalls=NoneNotWrapped,
w_builtins=NoneNotWrapped):
+ if self.is_enabled:
+ return # ignored
if w_subcalls is not None:
self.subcalls = space.bool_w(w_subcalls)
if w_builtins is not None:
self.builtins = space.bool_w(w_builtins)
+ # We want total_real_time and total_timestamp to end up containing
+ # (endtime - starttime). Now we are at the start, so we first
+ # have to subtract the current time.
+ self.is_enabled = True
+ self.total_real_time -= time.time()
+ self.total_timestamp -= read_timestamp()
# set profiler hook
+ c_setup_profiling()
space.getexecutioncontext().setllprofile(lsprof_call, space.wrap(self))
+ @jit.purefunction
+ def _get_or_make_entry(self, f_code, make=True):
+ try:
+ return self.data[f_code]
+ except KeyError:
+ if make:
+ entry = ProfilerEntry(f_code)
+ self.data[f_code] = entry
+ return entry
+ return None
+
+ @jit.purefunction
+ def _get_or_make_builtin_entry(self, key, make=True):
+ try:
+ return self.builtin_data[key]
+ except KeyError:
+ if make:
+ entry = ProfilerEntry(self.space.wrap(key))
+ self.builtin_data[key] = entry
+ return entry
+ return None
+
def _enter_call(self, f_code):
# we have a superb gc, no point in freelist :)
- try:
- entry = self.data[f_code]
- except KeyError:
- entry = ProfilerEntry(f_code)
- self.data[f_code] = entry
+ self = jit.hint(self, promote=True)
+ entry = self._get_or_make_entry(f_code)
self.current_context = ProfilerContext(self, entry)
def _enter_return(self, f_code):
context = self.current_context
if context is None:
return
- try:
- entry = self.data[f_code]
+ self = jit.hint(self, promote=True)
+ entry = self._get_or_make_entry(f_code, False)
+ if entry is not None:
context._stop(self, entry)
- except KeyError:
- pass
self.current_context = context.previous
def _enter_builtin_call(self, key):
- try:
- entry = self.builtin_data[key]
- except KeyError:
- entry = ProfilerEntry(self.space.wrap(key))
- self.builtin_data[key] = entry
- self.current_context = ProfilerContext(self, entry)
+ self = jit.hint(self, promote=True)
+ entry = self._get_or_make_builtin_entry(key)
+ self.current_context = ProfilerContext(self, entry)
def _enter_builtin_return(self, key):
context = self.current_context
if context is None:
return
- try:
- entry = self.builtin_data[key]
+ self = jit.hint(self, promote=True)
+ entry = self._get_or_make_builtin_entry(key, False)
+ if entry is not None:
context._stop(self, entry)
- except KeyError:
- pass
- self.current_context = context.previous
+ self.current_context = context.previous
def _flush_unmatched(self):
context = self.current_context
@@ -288,13 +345,29 @@
self.current_context = None
def disable(self, space):
+ if not self.is_enabled:
+ return # ignored
+ # We want total_real_time and total_timestamp to end up containing
+ # (endtime - starttime), or the sum of such intervals if
+ # enable() and disable() are called several times.
+ self.is_enabled = False
+ self.total_timestamp += read_timestamp()
+ self.total_real_time += time.time()
# unset profiler hook
space.getexecutioncontext().setllprofile(None, None)
+ c_teardown_profiling()
self._flush_unmatched()
def getstats(self, space):
if self.w_callable is None:
- factor = 1. # we measure time.time in floats
+ if self.is_enabled:
+ raise OperationError(space.w_RuntimeError,
+ space.wrap("Profiler instance must be disabled "
+ "before getting the stats"))
+ if self.total_timestamp:
+ factor = self.total_real_time / float(self.total_timestamp)
+ else:
+ factor = 1.0 # probably not used
elif self.time_unit > 0.0:
factor = self.time_unit
else:
diff --git a/pypy/translator/c/src/g_include.h b/pypy/translator/c/src/g_include.h
--- a/pypy/translator/c/src/g_include.h
+++ b/pypy/translator/c/src/g_include.h
@@ -39,11 +39,13 @@
#include "src/instrument.h"
#include "src/asm.h"
+#include "src/profiling.h"
+
+#include "src/debug_print.h"
/*** modules ***/
#ifdef HAVE_RTYPER /* only if we have an RTyper */
# include "src/rtyper.h"
-# include "src/debug_print.h"
# include "src/debug_traceback.h"
# include "src/debug_alloc.h"
#ifndef AVR
diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -14,7 +14,7 @@
modname, _ = modname.split('.', 1)
if modname in ['pypyjit', 'signal', 'micronumpy', 'math', 'exceptions',
'imp', 'sys', 'array', '_ffi', 'itertools', 'operator',
- '_socket', '_sre']:
+ '_socket', '_sre', '_lsprof']:
return True
return False
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -1023,6 +1023,10 @@
metainterp.history.record(rop.VIRTUAL_REF_FINISH,
[vrefbox, lastbox], None)
+ @arguments()
+ def opimpl_ll_read_timestamp(self):
+ return self.metainterp.execute_and_record(rop.READ_TIMESTAMP, None)
+
# ------------------------------
def setup_call(self, argboxes):
diff --git a/pypy/rpython/lltypesystem/opimpl.py b/pypy/rpython/lltypesystem/opimpl.py
--- a/pypy/rpython/lltypesystem/opimpl.py
+++ b/pypy/rpython/lltypesystem/opimpl.py
@@ -380,7 +380,7 @@
return ord(b)
def op_cast_int_to_unichar(b):
- assert type(b) is int
+ assert type(b) is int
return unichr(b)
def op_cast_int_to_uint(b):
@@ -578,6 +578,10 @@
def op_shrink_array(array, smallersize):
return False
+def op_ll_read_timestamp():
+ from pypy.rlib.rtimer import read_timestamp
+ return read_timestamp()
+
# ____________________________________________________________
def get_op_impl(opname):
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1356,6 +1356,19 @@
self.execute_operation(rop.JIT_DEBUG, [c_box, c_nest, c_nest,
c_nest, c_nest], 'void')
+ def test_read_timestamp(self):
+ if longlong.is_64_bit:
+ got1 = self.execute_operation(rop.READ_TIMESTAMP, [], 'int')
+ got2 = self.execute_operation(rop.READ_TIMESTAMP, [], 'int')
+ res1 = got1.getint()
+ res2 = got2.getint()
+ else:
+ got1 = self.execute_operation(rop.READ_TIMESTAMP, [], 'float')
+ got2 = self.execute_operation(rop.READ_TIMESTAMP, [], 'float')
+ res1 = got1.getlonglong()
+ res2 = got2.getlonglong()
+ assert res1 < res2 < res1 + 2**32
+
class LLtypeBackendTest(BaseBackendTest):
diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -32,7 +32,7 @@
assert isinstance(canraise, tuple)
assert not canraise or not canfold
-
+
# The operation manipulates PyObjects
self.pyobj = pyobj
@@ -440,6 +440,7 @@
'get_write_barrier_failing_case': LLOp(sideeffects=False),
'get_write_barrier_from_array_failing_case': LLOp(sideeffects=False),
'gc_get_type_info_group': LLOp(sideeffects=False),
+ 'll_read_timestamp': LLOp(canrun=True),
# __________ GC operations __________
@@ -482,7 +483,7 @@
'gc_typeids_z' : LLOp(),
# ------- JIT & GC interaction, only for some GCs ----------
-
+
'gc_adr_of_nursery_free' : LLOp(),
# ^^^ returns an address of nursery free pointer, for later modifications
'gc_adr_of_nursery_top' : LLOp(),
@@ -554,7 +555,8 @@
'debug_pdb': LLOp(),
'debug_assert': LLOp(tryfold=True),
'debug_fatalerror': LLOp(),
- 'debug_llinterpcall': LLOp(), # Python func call 'res=arg[0](*arg[1:])'
+ 'debug_llinterpcall': LLOp(canraise=(Exception,)),
+ # Python func call 'res=arg[0](*arg[1:])'
# in backends, abort() or whatever is fine
'debug_start_traceback': LLOp(),
'debug_record_traceback': LLOp(),
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -298,8 +298,11 @@
# Profile cases
if self.profilefunc is not None:
- if event not in ['leaveframe', 'call', 'c_call',
- 'c_return', 'c_exception']:
+ if not (event == 'leaveframe' or
+ event == 'call' or
+ event == 'c_call' or
+ event == 'c_return' or
+ event == 'c_exception'):
return False
last_exception = frame.last_exception
diff --git a/pypy/rlib/rtimer.py b/pypy/rlib/rtimer.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/rtimer.py
@@ -0,0 +1,37 @@
+import time
+
+from pypy.rlib.rarithmetic import r_longlong, r_ulonglong, r_uint
+from pypy.rlib.rarithmetic import intmask, longlongmask
+from pypy.rpython.extregistry import ExtRegistryEntry
+from pypy.rpython.lltypesystem import lltype, rffi
+
+_is_64_bit = r_uint.BITS > 32
+
+
+def read_timestamp():
+ # Returns a longlong on 32-bit, and a regular int on 64-bit.
+ # When running on top of python, build the result a bit arbitrarily.
+ x = long(time.time() * 500000000)
+ if _is_64_bit:
+ return intmask(x)
+ else:
+ return longlongmask(x)
+
+
+class ReadTimestampEntry(ExtRegistryEntry):
+ _about_ = read_timestamp
+
+ def compute_result_annotation(self):
+ from pypy.annotation.model import SomeInteger
+ if _is_64_bit:
+ return SomeInteger()
+ else:
+ return SomeInteger(knowntype=r_longlong)
+
+ def specialize_call(self, hop):
+ hop.exception_cannot_occur()
+ if _is_64_bit:
+ resulttype = lltype.Signed
+ else:
+ resulttype = rffi.LONGLONG
+ return hop.genop("ll_read_timestamp", [], resulttype=resulttype)
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -1,4 +1,5 @@
from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.rtimer import read_timestamp
from pypy.rlib.rarithmetic import intmask, LONG_BIT, r_uint, ovfcheck
from pypy.rlib.objectmodel import we_are_translated
from pypy.rlib.debug import debug_start, debug_stop
@@ -1205,6 +1206,10 @@
def bhimpl_unicodesetitem(cpu, unicode, index, newchr):
cpu.bh_unicodesetitem(unicode, index, newchr)
+ @arguments(returns=(longlong.is_64_bit and "i" or "f"))
+ def bhimpl_ll_read_timestamp():
+ return read_timestamp()
+
# ----------
# helpers to resume running in blackhole mode when a guard failed
@@ -1416,7 +1421,7 @@
current_exc = blackholeinterp._prepare_resume_from_failure(
resumedescr.guard_opnum, dont_change_position)
-
+
try:
_run_forever(blackholeinterp, current_exc)
finally:
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -23,7 +23,7 @@
# methods implemented by each concrete class
# ------------------------------------------
-
+
def getopnum(self):
raise NotImplementedError
@@ -234,7 +234,7 @@
def getarg(self, i):
raise IndexError
-
+
def setarg(self, i, box):
raise IndexError
@@ -258,7 +258,7 @@
return self._arg0
else:
raise IndexError
-
+
def setarg(self, i, box):
if i == 0:
self._arg0 = box
@@ -288,7 +288,7 @@
return self._arg1
else:
raise IndexError
-
+
def setarg(self, i, box):
if i == 0:
self._arg0 = box
@@ -326,7 +326,7 @@
return self._arg2
else:
raise IndexError
-
+
def setarg(self, i, box):
if i == 0:
self._arg0 = box
@@ -352,7 +352,7 @@
def getarg(self, i):
return self._args[i]
-
+
def setarg(self, i, box):
self._args[i] = box
@@ -460,6 +460,7 @@
'_MALLOC_LAST',
'FORCE_TOKEN/0',
'VIRTUAL_REF/2', # removed before it's passed to the backend
+ 'READ_TIMESTAMP/0',
'_NOSIDEEFFECT_LAST', # ----- end of no_side_effect operations -----
'SETARRAYITEM_GC/3d',
@@ -468,7 +469,7 @@
'SETFIELD_RAW/2d',
'STRSETITEM/3',
'UNICODESETITEM/3',
- #'RUNTIMENEW/1', # ootype operation
+ #'RUNTIMENEW/1', # ootype operation
'COND_CALL_GC_WB/2d', # [objptr, newvalue] (for the write barrier)
'DEBUG_MERGE_POINT/2', # debugging only
'JIT_DEBUG/*', # debugging only
@@ -554,7 +555,7 @@
2: BinaryOp,
3: TernaryOp
}
-
+
is_guard = name.startswith('GUARD')
if is_guard:
assert withdescr
diff --git a/pypy/rpython/lltypesystem/rdict.py b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -521,6 +521,7 @@
ll_dict_insertclean(d, entry.key, entry.value, hash)
i += 1
old_entries.delete()
+ll_dict_resize.oopspec = 'dict.resize(d)'
# ------- a port of CPython's dictobject.c's lookdict implementation -------
PERTURB_SHIFT = 5
diff --git a/pypy/module/_lsprof/test/test_cprofile.py b/pypy/module/_lsprof/test/test_cprofile.py
--- a/pypy/module/_lsprof/test/test_cprofile.py
+++ b/pypy/module/_lsprof/test/test_cprofile.py
@@ -91,6 +91,30 @@
assert spam2bar.inlinetime == 1.0
assert spam2bar.totaltime == 1.0
+ def test_scale_of_result(self):
+ import _lsprof, time
+ prof = _lsprof.Profiler()
+ def foo(n):
+ t = time.time()
+ while abs(t - time.time()) < 1.0:
+ pass # busy-wait for 1 second
+ def bar(n):
+ foo(n)
+ prof.enable()
+ bar(0)
+ prof.disable()
+ stats = prof.getstats()
+ entries = {}
+ for entry in stats:
+ entries[entry.code] = entry
+ efoo = entries[foo.func_code]
+ ebar = entries[bar.func_code]
+ assert 0.9 < efoo.totaltime < 2.9
+ assert 0.9 < efoo.inlinetime < 2.9
+ for subentry in ebar.calls:
+ assert 0.9 < subentry.totaltime < 2.9
+ assert 0.9 < subentry.inlinetime < 2.9
+
def test_cprofile(self):
import sys, os
# XXX this is evil trickery to walk around the fact that we don't
diff --git a/pypy/translator/c/src/profiling.h b/pypy/translator/c/src/profiling.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/profiling.h
@@ -0,0 +1,8 @@
+
+#ifndef PROFILING_H
+#define PROFILING_H
+
+void pypy_setup_profiling();
+void pypy_teardown_profiling();
+
+#endif
diff --git a/pypy/jit/tl/pypyjit.py b/pypy/jit/tl/pypyjit.py
--- a/pypy/jit/tl/pypyjit.py
+++ b/pypy/jit/tl/pypyjit.py
@@ -39,6 +39,7 @@
config.objspace.usemodules.array = True
config.objspace.usemodules._weakref = True
config.objspace.usemodules._sre = False
+config.objspace.usemodules._lsprof = True
#
config.objspace.usemodules._ffi = True
#
@@ -99,7 +100,7 @@
from pypy.translator.goal.ann_override import PyPyAnnotatorPolicy
from pypy.rpython.test.test_llinterp import get_interpreter
- # first annotate, rtype, and backendoptimize PyPy
+ # first annotate and rtype
try:
interp, graph = get_interpreter(entry_point, [], backendopt=False,
config=config,
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -330,7 +330,7 @@
if log:
self._register_counter()
operations = self._inject_debugging_code(looptoken, operations)
-
+
regalloc = RegAlloc(self, self.cpu.translate_support_code)
arglocs = regalloc.prepare_loop(inputargs, operations, looptoken)
looptoken._x86_arglocs = arglocs
@@ -339,7 +339,7 @@
stackadjustpos = self._assemble_bootstrap_code(inputargs, arglocs)
self.looppos = self.mc.get_relative_pos()
looptoken._x86_frame_depth = -1 # temporarily
- looptoken._x86_param_depth = -1 # temporarily
+ looptoken._x86_param_depth = -1 # temporarily
frame_depth, param_depth = self._assemble(regalloc, operations)
looptoken._x86_frame_depth = frame_depth
looptoken._x86_param_depth = param_depth
@@ -538,7 +538,7 @@
def _assemble(self, regalloc, operations):
self._regalloc = regalloc
- regalloc.walk_operations(operations)
+ regalloc.walk_operations(operations)
if we_are_translated() or self.cpu.dont_keepalive_stuff:
self._regalloc = None # else keep it around for debugging
frame_depth = regalloc.fm.frame_depth
@@ -1015,7 +1015,7 @@
dst_locs.append(unused_gpr.pop())
else:
pass_on_stack.append(loc)
-
+
# Emit instructions to pass the stack arguments
# XXX: Would be nice to let remap_frame_layout take care of this, but
# we'd need to create something like StackLoc, but relative to esp,
@@ -1441,6 +1441,17 @@
else:
assert 0, itemsize
+ def genop_read_timestamp(self, op, arglocs, resloc):
+ self.mc.RDTSC()
+ if longlong.is_64_bit:
+ self.mc.SHL_ri(edx.value, 32)
+ self.mc.OR_rr(edx.value, eax.value)
+ else:
+ loc1, = arglocs
+ self.mc.MOVD_xr(loc1.value, edx.value)
+ self.mc.MOVD_xr(resloc.value, eax.value)
+ self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value)
+
def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, ign_2):
loc = locs[0]
self.mc.TEST(loc, loc)
@@ -2131,7 +2142,7 @@
assert rx86.fits_in_32bits(tid)
self.mc.MOV_mi((eax.value, 0), tid)
self.mc.MOV(heap(nursery_free_adr), edx)
-
+
genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
genop_list = [Assembler386.not_implemented_op] * rop._LAST
genop_llong_list = {}
@@ -2142,7 +2153,7 @@
opname = name[len('genop_discard_'):]
num = getattr(rop, opname.upper())
genop_discard_list[num] = value
- elif name.startswith('genop_guard_') and name != 'genop_guard_exception':
+ elif name.startswith('genop_guard_') and name != 'genop_guard_exception':
opname = name[len('genop_guard_'):]
num = getattr(rop, opname.upper())
genop_guard_list[num] = value
diff --git a/pypy/translator/c/src/align.h b/pypy/translator/c/src/align.h
--- a/pypy/translator/c/src/align.h
+++ b/pypy/translator/c/src/align.h
@@ -1,3 +1,6 @@
+
+#ifndef _PYPY_ALIGN_H
+#define _PYPY_ALIGN_H
/* alignment for arena-based garbage collectors: the following line
enforces an alignment that should be enough for any structure
@@ -14,3 +17,5 @@
#define ROUND_UP_FOR_ALLOCATION(x, minsize) \
((((x)>=(minsize)?(x):(minsize)) \
+ (MEMORY_ALIGNMENT-1)) & ~(MEMORY_ALIGNMENT-1))
+
+#endif //_PYPY_ALIGN_H
diff --git a/pypy/translator/c/src/profiling.c b/pypy/translator/c/src/profiling.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/profiling.c
@@ -0,0 +1,35 @@
+
+#include <stddef.h>
+#if defined(__GNUC__) && defined(__linux__)
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#include <sched.h>
+#endif
+
+cpu_set_t base_cpu_set;
+int profiling_setup = 0;
+
+void pypy_setup_profiling()
+{
+ if (!profiling_setup) {
+ cpu_set_t set;
+ sched_getaffinity(0, sizeof(cpu_set_t), &base_cpu_set);
+ CPU_ZERO(&set);
+ CPU_SET(0, &set); /* restrict to a single cpu */
+ sched_setaffinity(0, sizeof(cpu_set_t), &set);
+ profiling_setup = 1;
+ }
+}
+
+void pypy_teardown_profiling()
+{
+ if (profiling_setup) {
+ sched_setaffinity(0, sizeof(cpu_set_t), &base_cpu_set);
+ profiling_setup = 0;
+ }
+}
+#else
+void pypy_setup_profiling() { }
+void pypy_teardown_profiling() { }
+#endif
More information about the Pypy-commit
mailing list