[pypy-svn] r36679 - in pypy/dist/pypy/translator/benchmark: . test

Sat Jan 13 17:59:27 CET 2007

Author: mwh
Date: Sat Jan 13 17:59:18 2007
New Revision: 36679

Modified:
   pypy/dist/pypy/translator/benchmark/bench-custom.py
   pypy/dist/pypy/translator/benchmark/benchmarks.py
   pypy/dist/pypy/translator/benchmark/result.py
   pypy/dist/pypy/translator/benchmark/test/test_result.py
Log:
more benchmarking stuff.  some horrible, horrible code in places, but it mostly
seems to work.  bench-custom.py is much smaller now, have to be happy about that


Modified: pypy/dist/pypy/translator/benchmark/bench-custom.py
==============================================================================

--- pypy/dist/pypy/translator/benchmark/bench-custom.py	(original)
+++ pypy/dist/pypy/translator/benchmark/bench-custom.py	Sat Jan 13 17:59:18 2007
@@ -1,65 +1,67 @@
 # benchmarks on a unix machine.
 
 import autopath
-from pypy.translator.benchmark.result import BenchmarkResult
+from pypy.translator.benchmark.result import BenchmarkResultSet
 from pypy.translator.benchmark.benchmarks import BENCHMARKS
-import os, sys, time, pickle, re
+import os, sys, time, pickle, re, py
 
 def get_executables(args):  #sorted by revision number (highest first)
-    return sorted(args, key=os.path.getmtime)
+    exes = sorted(args, key=os.path.getmtime)
+    r = []
+    for exe in exes:
+        if '/' not in exe:
+            r.append('./' + exe)
+        else:
+            r.append(exe)
+    return r
 
 def main(options, args):
-    benchmark_result = BenchmarkResult('bench-custom.benchmark_result')
+    if os.path.exists(options.picklefile):
+        benchmark_result = pickle.load(open(options.picklefile, 'rb'))
+    else:
+        benchmark_result = BenchmarkResultSet()
 
-    benchmarks = [b for b in BENCHMARKS if b[0] in options.benchmarks]
+    benchmarks = [b for b in BENCHMARKS if b.name in options.benchmarks]
 
     exes = get_executables(args)
-    pythons = 'python2.4 python2.3'.split()
-    width = max(map(len, exes+pythons+['executable'])) + 3
+    pythons = 'python2.5 python2.4 python2.3'.split()
+    full_pythons = []
+    for python in pythons:
+        full_python = py.path.local.sysfind(python)
+        if full_python:
+            full_pythons.append(str(full_python))
 
-    print 'date                           size codesize    %-*s'%(width, 'executable'),
-    for name, run, ascgood, units in benchmarks:
-        print '    %-*s'%(6+len(units)+2+8+2-4, name),
-    print
     sys.stdout.flush()
 
     refs = {}
 
-    for exe in pythons+exes:
-        exe_ = exe
-        if exe in pythons:
-            size = codesize = '-'
-            ctime = time.ctime()
-        else:
-            size = os.path.getsize(exe)
-            codesize = os.popen('size "%s" | tail -n1 | cut -f1'%(exe,)).read().strip()
-            ctime = time.ctime(os.path.getmtime(exe))
-            if '/' not in exe:
-                exe_ = './' + exe
-        print '%-26s %8s %8s    %-*s'%(ctime, size, codesize, width, exe),
-        sys.stdout.flush()
-        for name, run, ascgood, units in benchmarks:
-            n = exe + '_' + name
-            if not benchmark_result.is_stable(n):
-                benchmark_result.update(n, run(exe_), ascgood)
-            res = benchmark_result.get_best_result(n)
-            if name not in refs:
-                refs[name] = res
-            factor = res/refs[name]
-            if ascgood:
-                factor = 1/factor
-            print "%6d%s (%6.1fx)"%(res, units, factor),
-            sys.stdout.flush()
-        print
+    exes = full_pythons+exes
 
-        sys.stdout.flush()
+    for i in range(int(options.runcount)):
+        for exe in full_pythons+exes:
+            for b in benchmarks:
+                benchmark_result.result(exe).run_benchmark(b, verbose=True)
+
+    stats = ['stat:st_mtime', 'exe_name', 'bench:richards', 'pypy_rev', 'bench:pystone']
+    for row in benchmark_result.txt_summary(stats,
+                                            relto=full_pythons[0],
+                                            filteron=lambda r: r.exe_name in exes):
+        print row
 
 if __name__ == '__main__':
     from optparse import OptionParser
     parser = OptionParser()
     parser.add_option(
         '--benchmarks', dest='benchmarks',
-        default=','.join([b[0] for b in BENCHMARKS])
+        default=','.join([b.name for b in BENCHMARKS])
+        )
+    parser.add_option(
+        '--pickle', dest='picklefile',
+        default='bench-custom.benchmark_result'
+        )
+    parser.add_option(
+        '--runcount', dest='runcount',
+        default='1',
         )
     options, args = parser.parse_args(sys.argv[1:])
     main(options, args)

Modified: pypy/dist/pypy/translator/benchmark/benchmarks.py
==============================================================================
--- pypy/dist/pypy/translator/benchmark/benchmarks.py	(original)
+++ pypy/dist/pypy/translator/benchmark/benchmarks.py	Sat Jan 13 17:59:18 2007
@@ -18,6 +18,13 @@
         return 99999.0
     return float(line.split()[len(pattern.split())])
 
+class Benchmark(object):
+    def __init__(self, name, runner, asc_good, units):
+        self.name = name
+        self.run = runner
+        self.asc_good = asc_good
+        self.units = units
+
 def run_cmd(cmd):
     #print "running", cmd
     pipe = os.popen(cmd + ' 2>&1')
@@ -50,7 +57,11 @@
        return 99999.0
     return 1000*(float(m.group('mins'))*60 + float(m.group('secs')))
 
-BENCHMARKS = [('richards', run_richards, RICHARDS_ASCENDING_GOOD, 'ms'),
-              ('pystone', run_pystone, PYSTONE_ASCENDING_GOOD, ''),
-              ('translate', run_translate, RICHARDS_ASCENDING_GOOD, 'ms'),
+BENCHMARKS = [Benchmark('richards', run_richards, RICHARDS_ASCENDING_GOOD, 'ms'),
+              Benchmark('pystone', run_pystone, PYSTONE_ASCENDING_GOOD, ''),
+              Benchmark('translate', run_translate, RICHARDS_ASCENDING_GOOD, 'ms'),
              ]
+
+BENCHMARKS_BY_NAME = {}
+for _b in BENCHMARKS:
+    BENCHMARKS_BY_NAME[_b.name] = _b

Modified: pypy/dist/pypy/translator/benchmark/result.py
==============================================================================
--- pypy/dist/pypy/translator/benchmark/result.py	(original)
+++ pypy/dist/pypy/translator/benchmark/result.py	Sat Jan 13 17:59:18 2007
@@ -1,21 +1,140 @@
-import os, pickle
+import os, pickle, sys, time, re
+
+stat2title = {
+    'stat:st_mtime':  "date",
+    'exe_name':       "executable",
+    'bench:richards': "richards",
+    'bench:pystone':  "pystone",
+}
+
+class BenchmarkResultSet(object):
+    def __init__(self, max_results=10):
+        self.benchmarks = {}
+        self.max_results = max_results
+
+    def result(self, exe):
+        if exe in self.benchmarks:
+            return self.benchmarks[exe]
+        else:
+            r = self.benchmarks[exe] = BenchmarkResult(exe, self.max_results)
+            return r
+
+    def txt_summary(self, stats, **kw):
+        sortkey = kw.get('sortby', 'stat:st_mtime')
+        lst = self.benchmarks.values()
+        lst.sort(key=lambda x:x.getstat(sortkey, None), reverse=kw.get('reverse', False))
+        if 'filteron' in kw:
+            filteron = kw['filteron']
+            lst = [r for r in lst if filteron(r)]
+        relto = kw.get('relto', None)
+        table = [[(stat2title.get(s,s),0) for s in stats]]
+        for r in lst:
+            row = []
+            for stat in stats:
+                if stat.startswith('bench:'):
+                    benchname = stat[6:]
+                    if r.getstat(stat, None) is None:
+                        row.append(('XXX',-1))
+                    elif relto:
+                        factor = self.result(relto).getstat(stat)/r.getstat(stat)
+                        if not r.asc_goods[benchname]:
+                            factor = 1/factor
+                        s, f = r.fmtstat(stat)
+                        row.append((s + ' (%6.2fx)'%factor, f))
+                    else:
+                        row.append(r.fmtstat(stat))
+                else:
+                    row.append(r.fmtstat(stat))
+            table.append(row)
+        widths = [0 for thing in stats]
+        for row in table:
+            for i, cell in enumerate(row):
+                widths[i] = max(len(cell[0]), widths[i])
+        concretetable = []
+        concreterow = []
+        for w, cell in zip(widths, table[0]):
+            concreterow.append(cell[0].center(w))
+        concretetable.append(' '.join(concreterow))
+        for row in table[1:]:
+            concreterow = []
+            for w, cell in zip(widths, row):
+                concreterow.append("%*s"%(cell[1]*w, cell[0]))
+            concretetable.append(' '.join(concreterow))
+        return concretetable
 
 class BenchmarkResult(object):
 
-    def __init__(self, filename, max_results=10):
-        self.filename    = filename
+    def __init__(self, exe, max_results=10):
         self.max_results = max_results
-        if os.path.exists(filename):
-            f = open(filename, 'r')
-            self.n_results   = pickle.load(f)
-            self.best_result = pickle.load(f)
-            f.close()
-            # any exception while loading the file is best reported
-            # as a crash, instead of as a silent loss of all the
-            # data :-/
+        self.exe_stat = os.stat(exe)
+        self.exe_name = exe
+        self.codesize = os.popen('size "%s" | tail -n1 | cut -f1'%(exe,)).read().strip()
+        try:
+            self.pypy_rev = int(os.popen(
+                exe + ' -c "import sys; print sys.pypy_version_info[-1]" 2>/dev/null').read().strip())
+        except ValueError:
+            self.pypy_rev = -1
+        self.best_benchmarks = {}
+        self.benchmarks = {}
+        self.asc_goods = {}
+        self.run_counts = {}
+
+    def run_benchmark(self, benchmark, verbose=False):
+        self.asc_goods[benchmark.name] = benchmark.asc_good
+        if self.run_counts.get(benchmark.name, 0) > self.max_results:
+            return
+        if verbose:
+            print 'running', benchmark.name, 'for', self.exe_name
+        new_result = benchmark.run(self.exe_name)
+        self.benchmarks.setdefault(benchmark.name, []).append(new_result)
+        if benchmark.name in self.best_benchmarks:
+            old_result = self.best_benchmarks[benchmark.name]
+            if benchmark.asc_good:
+                new_result = max(new_result, old_result)
+            else:
+                new_result = min(new_result, old_result)
+        self.best_benchmarks[benchmark.name] = new_result
+        self.run_counts[benchmark.name] = self.run_counts.get(benchmark.name, 0) + 1
+
+    def getstat(self, *args):
+        # oh for supplied-p!
+        return_default = False
+        if len(args) == 1:
+            stat, = args
+        else:
+            stat, default = args
+            return_default = True
+        if hasattr(self, stat):
+            return getattr(self, stat)
+        statkind, statdetail = stat.split(':')
+        if statkind == 'stat':
+            return getattr(self.exe_stat, statdetail)
+        elif statkind == 'bench':
+            if return_default:
+                return self.best_benchmarks.get(statdetail, default)
+            else:
+                return self.best_benchmarks[statdetail]
+        else:
+            1/0
+
+    def fmtstat(self, *args):
+        stat = args[0]
+        statvalue = self.getstat(*args)
+        if stat == 'stat:st_mtime':
+            return time.ctime(statvalue), -1
+        elif stat == 'exe_name':
+            return os.path.basename(statvalue), -1
+        elif stat == 'bench:richards':
+            return "%8.2f%s"%(statvalue, 'ms'), 1
+        elif stat == 'bench:pystone':
+            return "%8.2f"%(statvalue,), 1
+        elif stat == 'pypy_rev':
+            return str(statvalue), 1
         else:
-            self.n_results   = {}
-            self.best_result = {}
+            return str(statvalue), -1
+
+    def summary(self, stats):
+        return [self.getstat(stat) for stat in stats]
 
     def is_stable(self, name):
         try:
@@ -23,22 +142,20 @@
         except:
             return False
 
-    def update(self, name, result, ascending_good):
-        try:
-            if ascending_good:
-                self.best_result[name] = max(self.best_result[name], result)
-            else:
-                self.best_result[name] = min(self.best_result[name], result)
-        except KeyError:
-            self.n_results[name] = 0
-            self.best_result[name] = result
-        self.n_results[name] += 1
-
-        f = open(self.filename, 'w')
-        pickle.dump(self.n_results  , f)
-        pickle.dump(self.best_result, f)
-        f.close()
-
-    def get_best_result(self, name):
-        return self.best_result[name]
-
+if __name__ == '__main__':
+    import autopath
+    from pypy.translator.benchmark import benchmarks, result
+    import cPickle
+    if os.path.exists('foo.pickle'):
+        s = cPickle.load(open('foo.pickle', 'rb'))
+    else:
+        s = result.BenchmarkResultSet(4)
+    for exe in sys.argv[1:]:
+        r = s.result(exe)
+        r.run_benchmark(benchmarks.BENCHMARKS_BY_NAME['richards'])
+        r.run_benchmark(benchmarks.BENCHMARKS_BY_NAME['pystone'])
+    cPickle.dump(s, open('foo.pickle', 'wb'))
+    stats = ['stat:st_mtime', 'exe_name', 'bench:richards', 'bench:pystone']
+    
+    for row in s.txt_summary(stats, sortby="exe_name", reverse=True, relto="/usr/local/bin/python2.4"):
+        print row

Modified: pypy/dist/pypy/translator/benchmark/test/test_result.py
==============================================================================
--- pypy/dist/pypy/translator/benchmark/test/test_result.py	(original)
+++ pypy/dist/pypy/translator/benchmark/test/test_result.py	Sat Jan 13 17:59:18 2007
@@ -1,11 +1,13 @@
 import py
 from pypy.translator.benchmark import result
 
+py.test.skip("not doing TDD for this :/")
+
 temp = py.test.ensuretemp("report")
 
 def test_simple():
     fname = temp.join("simple")
-    b = result.BenchmarkResult(str(fname), 3)
+    b = result.BenchmarkResult()
 
     b.update('foo', 1, True)
     assert b.get_best_result('foo') == 1