[pypy-svn] r59934 - pypy/build/benchmem

Sat Nov 15 22:47:57 CET 2008

Author: xoraxax
Date: Sat Nov 15 22:47:56 2008
New Revision: 59934

Modified:
   pypy/build/benchmem/report.py
   pypy/build/benchmem/runbench.py
Log:
Enhanced the textual appprofiles report, changed pauses threshold to a fixed one.

Modified: pypy/build/benchmem/report.py
==============================================================================

--- pypy/build/benchmem/report.py	(original)
+++ pypy/build/benchmem/report.py	Sat Nov 15 22:47:56 2008
@@ -351,7 +351,12 @@
         self.resultset = resultset.filter(benchtype="appprofiles")
 
     def getexecutables(self):
-        return [r.executable for r in self.resultset.results]
+        l = []
+        for r in self.resultset.results:
+            executable = r.executable
+            if executable not in l:
+                l.append(executable)
+        return l
 
     def run(self):
         if not self.resultset:
@@ -360,18 +365,53 @@
         tw.sep("=", "Appprofiles memory sampling")
         # result.mintimings -> [(name, timings_dict)]
         executables = self.getexecutables()
-        row0 = "run duration #snapshots min avg max".split()
+        row0 = "run duration #snapshots heapdata_min heapdata_avg heapdata_max dirtyd_min dirtyd_avg dirtyd_max min_data avg_data max_data code_min code_avg code_max".split()
         rows = [row0]
-        names = [result.benchname for result in self.resultset.results]
+        names = []
+        for result in self.resultset.results:
+            if result.benchname not in names:
+                names.append(result.benchname)
+        stats = {}
+        attrs = ("min", "avg", "max",
+                 "min_dirtied_data", "avg_dirtied_data", "max_dirtied_data",
+                 "min_data", "avg_data", "max_data",
+                 "min_code", "avg_code", "max_code",
+                 )
         for name in names:
             for result in self.resultset.results:
                 if name == result.benchname:
                     timestamps = [float(ss.timestamp) for ss in result.snapshots]
                     min_ts, max_ts = min(timestamps), max(timestamps)
+                    row = ["%s-%s" %(result.executable, name),
+                           "%.2f" % (max_ts - min_ts, ), len(result.snapshots)]
+
+                    for attr in attrs:
+                        for kind, default in ((min, 2**31), (max, 0)):
+                            key = (result.executable, kind.__name__)
+                            valuedict = stats.setdefault(key, {})
+                            old = valuedict.get(attr, default)
+                            data = getattr(result, attr)
+                            valuedict[attr] = kind(old, data)
+                            data_str = str(data)
+                            if "avg" in attr:
+                                data_str = "%.2f" % data
+
+                        row.append(data_str)
+
+                    rows.append(row)
+        for executable in executables:
+            for kind in (min, max):
+                kindname = kind.__name__
+                key = (executable, kind.__name__)
+                row = ["%s-%s" % (executable, kindname), "-", "-"]
+                for attr in attrs:
+                    data = stats[key][attr]
+                    data_str = str(data)
+                    if "avg" in attr:
+                        data_str = "%.2f" % data
+                    row.append(data_str)
+                rows.append(row)
 
-                    rows.append(["%s-%s" %(result.executable, name),
-                       "%.2f" % (max_ts - min_ts, ), len(result.snapshots),
-                       result.min, "%.2f" % result.avg, result.max])
         tw.line(asciitable(rows))
 
 
@@ -393,10 +433,12 @@
         for result in self.resultset.results:
             cell0 = "%s-%s" % (result.executable_short, result.benchname)
             samples = result.lst
-            base_sample = min(samples)
             # get all samples that are larger than FACTOR * smallest_sample
-            FACTOR = 1000
-            threshold = FACTOR * base_sample
+            # base_sample = min(samples)
+            # FACTOR = 100
+            # threshold = FACTOR * base_sample
+            # get all samples that are larger than threshold
+            threshold = 0.050
             median = samples[len(samples) / 2]
             pauses = [sample for sample in samples if sample > threshold]
             pauses.sort()

Modified: pypy/build/benchmem/runbench.py
==============================================================================
--- pypy/build/benchmem/runbench.py	(original)
+++ pypy/build/benchmem/runbench.py	Sat Nov 15 22:47:56 2008
@@ -498,10 +498,29 @@
 
     def __init__(self, snapshots, *args, **kwargs):
         CommonResult.__init__(self, snapshots, *args, **kwargs)
-        self.min = min(ss.heap_and_data(self) for ss in snapshots)
-        self.max = max(ss.heap_and_data(self) for ss in snapshots)
+        avg = lambda x: sum(x) / float(len(snapshots))
+
+        heap_values = [ss.heap_and_data(self) for ss in snapshots]
+        self.min = min(heap_values)
+        self.max = max(heap_values)
         # XXX this way we are not integrating over time so we have skew again
-        self.avg = sum(ss.heap_and_data(self) for ss in snapshots) / float(len(snapshots))
+        self.avg = avg(heap_values)
+
+        code_values = [ss.filter(group=self.executable, kind=Mappings.CODE).rss
+                for ss in snapshots]
+        dirty_data_values = [ss.filter(group=self.executable, kind=Mappings.DATA).dirty
+                for ss in snapshots]
+        data_values = [ss.filter(group=self.executable, kind=Mappings.DATA).rss
+                for ss in snapshots]
+        self.max_code = max(code_values)
+        self.min_code = min(code_values)
+        self.avg_code = avg(code_values)
+        self.max_dirtied_data = max(dirty_data_values)
+        self.min_dirtied_data = min(dirty_data_values)
+        self.avg_dirtied_data = avg(dirty_data_values)
+        self.min_data = min(data_values)
+        self.avg_data = avg(data_values)
+        self.max_data = max(data_values)
 
 
 class BasesizeResult(Result):
@@ -577,6 +596,8 @@
             setattr(self, name, sum([getattr(x, name) for x in mappings]))
         self.private = self.private_dirty + self.private_clean
         self.shared = self.shared_dirty + self.shared_clean
+        self.dirty = self.private_dirty + self.shared_dirty
+        self.clean = self.private_clean + self.shared_clean
 
     def filter(self, group=None, kind=None, inv=False):
         new_mappings = []