[pypy-commit] extradoc extradoc: commit

Thu Jul 26 11:14:18 CEST 2012

Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: extradoc
Changeset: r4371:7706cb52355c
Date: 2012-07-26 11:13 +0200
http://bitbucket.org/pypy/extradoc/changeset/7706cb52355c/

Log:	commit

diff --git a/talk/vmil2012/Makefile b/talk/vmil2012/Makefile
--- a/talk/vmil2012/Makefile
+++ b/talk/vmil2012/Makefile
@@ -1,5 +1,5 @@
 
-jit-guards.pdf: paper.tex paper.bib figures/log.tex figures/example.tex figures/benchmarks_table.tex
+jit-guards.pdf: paper.tex paper.bib figures/log.tex figures/example.tex figures/benchmarks_table.tex figures/backend_table.tex
 	pdflatex paper
 	bibtex paper
 	pdflatex paper
@@ -18,12 +18,18 @@
 %.tex: %.py
 	pygmentize -l python -o $@ $<
 
-figures/benchmarks_table.tex: tool/build_tables.py logs/summary.csv tool/table_template.tex
+figures/%_table.tex: tool/build_tables.py logs/backend_summary.csv logs/summary.csv tool/table_template.tex
 	tool/setup.sh
-	paper_env/bin/python tool/build_tables.py logs/summary.csv tool/table_template.tex figures/benchmarks_table.tex
+	paper_env/bin/python tool/build_tables.py $@
+
+logs/logbench*:;
 
 logs/summary.csv: logs/logbench* tool/difflogs.py
-	python tool/difflogs.py --diffall logs
+	@if ls logs/logbench* &> /dev/null; then python tool/difflogs.py --diffall logs; fi
+
+logs/backend_summary.csv: logs/logbench* tool/backenddata.py
+	@if ls logs/logbench* &> /dev/null; then python tool/backenddata.py logs; fi
 
 logs::
 	tool/run_benchmarks.sh
+
diff --git a/talk/vmil2012/logs/backend_summary.csv b/talk/vmil2012/logs/backend_summary.csv
new file mode 100644
--- /dev/null
+++ b/talk/vmil2012/logs/backend_summary.csv
@@ -0,0 +1,12 @@
+exe,bench,asm size,guard map size
+pypy-c,chaos,154,24
+pypy-c,crypto_pyaes,167,24
+pypy-c,django,220,47
+pypy-c,go,4802,874
+pypy-c,pyflate-fast,719,150
+pypy-c,raytrace-simple,486,75
+pypy-c,richards,153,17
+pypy-c,spambayes,2502,337
+pypy-c,sympy_expand,918,211
+pypy-c,telco,506,77
+pypy-c,twisted_names,1604,211
diff --git a/talk/vmil2012/logs/summary.csv b/talk/vmil2012/logs/summary.csv
--- a/talk/vmil2012/logs/summary.csv
+++ b/talk/vmil2012/logs/summary.csv
@@ -1,12 +1,12 @@
 exe,bench,number of loops,new before,new after,get before,get after,set before,set after,guard before,guard after,numeric before,numeric after,rest before,rest after
-pypy,chaos,32,1810,186,1874,928,8996,684,598,242,1024,417,7603,2711
-pypy,crypto_pyaes,35,1385,234,1066,641,9660,873,373,110,1333,735,5976,3435
-pypy,django,39,1328,184,2711,1125,8251,803,884,275,623,231,7847,2831
-pypy,go,870,59577,4874,93474,32476,373715,22356,21449,7742,20792,7191,217142,78327
-pypy,pyflate-fast,147,5797,781,7654,3346,38540,2394,1977,1031,3805,1990,28135,12097
-pypy,raytrace-simple,115,7001,629,6283,2664,43793,2788,2078,861,2263,1353,28079,9234
-pypy,richards,51,1933,84,2614,1009,15947,569,634,268,700,192,10633,3430
-pypy,spambayes,477,16535,2861,29399,13143,114323,17032,6620,2318,13209,5387,75324,32570
-pypy,sympy_expand,174,6485,1067,10328,4131,36197,4078,2981,956,2493,1133,34017,11162
-pypy,telco,93,7289,464,9825,2244,40435,2559,2063,473,2833,964,35278,8996
-pypy,twisted_names,260,15575,2246,28618,10050,94792,9744,7838,1792,9127,2978,78420,25893
+pypy-c,chaos,32,1810,186,1874,928,8996,684,598,242,1024,417,7603,2711
+pypy-c,crypto_pyaes,35,1385,234,1066,641,9660,873,373,110,1333,735,5976,3435
+pypy-c,django,39,1328,184,2711,1125,8251,803,884,275,623,231,7847,2831
+pypy-c,go,870,59577,4874,93474,32476,373715,22356,21449,7742,20792,7191,217142,78327
+pypy-c,pyflate-fast,147,5797,781,7654,3346,38540,2394,1977,1031,3805,1990,28135,12097
+pypy-c,raytrace-simple,115,7001,629,6283,2664,43793,2788,2078,861,2263,1353,28079,9234
+pypy-c,richards,51,1933,84,2614,1009,15947,569,634,268,700,192,10633,3430
+pypy-c,spambayes,472,16117,2832,28469,12885,110877,16673,6419,2280,12936,5293,73480,31978
+pypy-c,sympy_expand,174,6485,1067,10328,4131,36197,4078,2981,956,2493,1133,34017,11162
+pypy-c,telco,93,7289,464,9825,2244,40435,2559,2063,473,2833,964,35278,8996
+pypy-c,twisted_names,235,14357,2012,26042,9251,88092,8553,7125,1656,8216,2649,71912,23881
diff --git a/talk/vmil2012/paper.tex b/talk/vmil2012/paper.tex
--- a/talk/vmil2012/paper.tex
+++ b/talk/vmil2012/paper.tex
@@ -354,9 +354,9 @@
   \noindent
   \centering
   \begin{minipage}{1\columnwidth}
-    \begin{lstlisting}
-    i8 = int_eq(i6, 1)
-    guard_false(i8) [i6, i1, i0]
+    \begin{lstlisting}[mathescape]
+$b_1$ = int_eq($i_2$, 1)
+guard_false($b_1$)
     \end{lstlisting}
   \end{minipage}
   \begin{minipage}{.40\columnwidth}
@@ -455,7 +455,54 @@
 \section{Evaluation}
 \label{sec:evaluation}
 
-\include{figures/benchmarks_table}
+The following analysis is based on a selection of benchmarks taken from the set
+of benchmarks used to measure the performance of PyPy as can be seen
+on\footnote{http://speed.pypy.org/}. The selection is based on the following
+criteria \bivab{??}. The benchmarks were taken from the PyPy benchmarks
+repository using revision
+\texttt{ff7b35837d0f}\footnote{https://bitbucket.org/pypy/benchmarks/src/ff7b35837d0f}.
+The benchmarks were run on a version of PyPy based on the
+tag~\texttt{release-1.9} and patched to collect additional data about the
+guards in the machine code
+backends\footnote{https://bitbucket.org/pypy/pypy/src/release-1.9}. All
+benchmark data was collected on a MacBook Pro 64 bit running Max OS
+10.7.4 \bivab{do we need more data for this kind of benchmarks} with the loop
+unrolling optimization disabled\bivab{rationale?}.
+
+Figure~\ref{fig:ops_count} shows the total number of operations that are
+recorded during tracing for each of the benchmarks on what percentage of these
+are guards. Figure~\ref{fig:ops_count} also shows the number of operations left
+after performing the different trace optimizations done by the trace optimizer,
+such as xxx. The last columns show the overall optimization rate and the
+optimization rate specific for guard operations, showing what percentage of the
+operations was removed during the optimizations phase.
+
+\begin{figure*}
+    \include{figures/benchmarks_table}
+    \caption{Benchmark Results}
+    \label{fig:ops_count}
+\end{figure*}
+
+\bivab{should we rather count the trampolines as part of the guard data instead
+of counting it as part of the instructions}
+
+Figure~\ref{fig:backend_data} shows
+the total memory consumption of the code and of the data generated by the machine code
+backend for the different benchmarks mentioned above. Meaning the operations
+left after optimization take the space shown in Figure~\ref{fig:backend_data}
+after being compiled. Also the additional data stored for the guards to be used
+in case of a bailout and attaching a bridge.
+\begin{figure*}
+    \include{figures/backend_table}
+    \caption{Total size of generated machine code and guard data}
+    \label{fig:backend_data}
+\end{figure*}
+
+Both figures do not take into account garbage collection. Pieces of machine
+code can be globally invalidated or just become cold again. In both cases the
+generated machine code and the related data is garbage collected. The figures
+show the total amount of operations that are evaluated by the JIT and the
+total amount of code and data that is generated from the optimized traces.
 
 * Evaluation
    * Measure guard memory consumption and machine code size
diff --git a/talk/vmil2012/tool/backenddata.py b/talk/vmil2012/tool/backenddata.py
new file mode 100644
--- /dev/null
+++ b/talk/vmil2012/tool/backenddata.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+"""
+Parse and summarize the traces produced by pypy-c-jit when PYPYLOG is set.
+only works for logs when unrolling is disabled
+"""
+
+import csv
+import optparse
+import os
+import re
+import sys
+from pypy.jit.metainterp.history import ConstInt
+from pypy.jit.tool.oparser import parse
+from pypy.rpython.lltypesystem import llmemory, lltype
+from pypy.tool import logparser
+
+
+def collect_logfiles(path):
+    if not os.path.isdir(path):
+        logs = [os.path.basename(path)]
+    else:
+        logs = os.listdir(path)
+    all = []
+    for log in logs:
+        parts = log.split(".")
+        if len(parts) != 3:
+            continue
+        l, exe, bench = parts
+        if l != "logbench":
+            continue
+        all.append((exe, bench, log))
+    all.sort()
+    return all
+
+
+def collect_guard_data(log):
+    """Calculate the total size in bytes of the locations maps for all guards
+    in a logfile"""
+    guards = logparser.extract_category(log, 'jit-backend-guard-size')
+    return sum(int(x[6:]) for x in guards if x.startswith('chars'))
+
+
+def collect_asm_size(log, guard_size=0):
+    """Calculate the size of the machine code pieces of a logfile. If
+    guard_size is passed it is substracted from result under the assumption
+    that the guard location maps are encoded in the instruction stream"""
+    asm = logparser.extract_category(log, 'jit-backend-dump')
+    asmlen = 0
+    for block in asm:
+        expr = re.compile("CODE_DUMP @\w+ \+\d+\s+(.*$)")
+        match = expr.search(block)
+        assert match is not None  # no match found
+        code = match.group(1)
+        asmlen += len(code)
+    return asmlen - guard_size
+
+
+def collect_data(dirname, logs):
+    for exe, name, log in logs:
+        path = os.path.join(dirname, log)
+        logfile = logparser.parse_log_file(path)
+        guard_size = collect_guard_data(logfile)
+        asm_size = collect_asm_size(logfile, guard_size)
+        yield (exe, name, log, asm_size, guard_size)
+
+
+def main(path):
+    logs = collect_logfiles(path)
+    if os.path.isdir(path):
+        dirname = path
+    else:
+        dirname = os.path.dirname(path)
+    results = collect_data(dirname, logs)
+
+    with file("logs/backend_summary.csv", "w") as f:
+        csv_writer = csv.writer(f)
+        row = ["exe", "bench", "asm size", "guard map size"]
+        csv_writer.writerow(row)
+        print row
+        for exe, bench, log, asm_size, guard_size in results:
+            row = [exe, bench, asm_size / 1024, guard_size / 1024]
+            csv_writer.writerow(row)
+            print row
+
+if __name__ == '__main__':
+    parser = optparse.OptionParser(usage="%prog logdir_or_file")
+
+    options, args = parser.parse_args()
+    if len(args) != 1:
+        parser.print_help()
+        sys.exit(2)
+    else:
+        main(args[0])
diff --git a/talk/vmil2012/tool/build_tables.py b/talk/vmil2012/tool/build_tables.py
--- a/talk/vmil2012/tool/build_tables.py
+++ b/talk/vmil2012/tool/build_tables.py
@@ -2,25 +2,29 @@
 import csv
 import django
 from django.template import Template, Context
-import optparse
-from os import path
+import os
 import sys
 
-#
+# This line is required for Django configuration
+django.conf.settings.configure()
 
 
-def main(csvfile, template, texfile):
+def getlines(csvfile):
     with open(csvfile, 'rb') as f:
         reader = csv.DictReader(f, delimiter=',')
-        lines = [l for l in reader]
+        return [l for l in reader]
+
+
+def build_ops_count_table(csvfile, texfile, template):
+    lines = getlines(csvfile)
 
     head = ['Benchmark',
             'ops b/o',
             '\\% guards b/o',
             'ops a/o',
             '\\% guards a/o',
-            'opt. rate',
-            'guard opt. rate',]
+            'opt. rate in \\%',
+            'guard opt. rate in \\%']
 
     table = []
     # collect data
@@ -33,22 +37,45 @@
         res = [
                 bench['bench'].replace('_', '\\_'),
                 ops_bo,
-                "%.2f (%s)" % (guards_bo / ops_bo * 100, bench['guard before']),
+                "%.2f" % (guards_bo / ops_bo * 100,),
                 ops_ao,
-                "%.2f (%s)" % (guards_ao / ops_ao * 100, bench['guard after']),
-                "%.2f" % ((1 - ops_ao/ops_bo) * 100,),
-                "%.2f" % ((1 - guards_ao/guards_bo) * 100,),
+                "%.2f" % (guards_ao / ops_ao * 100,),
+                "%.2f" % ((1 - ops_ao / ops_bo) * 100,),
+                "%.2f" % ((1 - guards_ao / guards_bo) * 100,),
               ]
         table.append(res)
-    output = render_table(template, head, table)
+    output = render_table(template, head, sorted(table))
+    write_table(output, texfile)
+
+
+def build_backend_count_table(csvfile, texfile, template):
+    lines = getlines(csvfile)
+
+    head = ['Benchmark',
+            'Machine code size (kB)',
+            'll resume data (kB)',
+            '\\% of machine code size']
+
+    table = []
+    # collect data
+    for bench in lines:
+        bench['bench'] = bench['bench'].replace('_', '\\_')
+        keys = ['bench', 'asm size', 'guard map size']
+        gmsize = int(bench['guard map size'])
+        asmsize = int(bench['asm size'])
+        rel = "%.2f" % (gmsize / asmsize * 100,)
+        table.append([bench[k] for k in keys] + [rel])
+    output = render_table(template, head, sorted(table))
+    write_table(output, texfile)
+
+
+def write_table(output, texfile):
     # Write the output to a file
     with open(texfile, 'w') as out_f:
         out_f.write(output)
 
 
 def render_table(ttempl, head, table):
-    # This line is required for Django configuration
-    django.conf.settings.configure()
     # open and read template
     with open(ttempl) as f:
         t = Template(f.read())
@@ -56,12 +83,25 @@
     return t.render(c)
 
 
+tables = {
+        'benchmarks_table.tex':
+            ('summary.csv', build_ops_count_table),
+        'backend_table.tex':
+            ('backend_summary.csv', build_backend_count_table)
+        }
+
+
+def main(table):
+    tablename = os.path.basename(table)
+    if tablename not in tables:
+        raise AssertionError('unsupported table')
+    data, builder = tables[tablename]
+    csvfile = os.path.join('logs', data)
+    texfile = os.path.join('figures', tablename)
+    template = os.path.join('tool', 'table_template.tex')
+    builder(csvfile, texfile, template)
+
+
 if __name__ == '__main__':
-    parser = optparse.OptionParser(usage="%prog csvfile template.tex output.tex")
-    options, args = parser.parse_args()
-    if len(args) < 3:
-        parser.print_help()
-        sys.exit(2)
-    else:
-        main(args[0], args[1], args[2])
-
+    assert len(sys.argv) > 1
+    main(sys.argv[1])
diff --git a/talk/vmil2012/tool/ll_resume_data_count.patch b/talk/vmil2012/tool/ll_resume_data_count.patch
new file mode 100644
--- /dev/null
+++ b/talk/vmil2012/tool/ll_resume_data_count.patch
@@ -0,0 +1,37 @@
+diff -r eec77c3e87d6 pypy/jit/backend/x86/assembler.py
+--- a/pypy/jit/backend/x86/assembler.py	Tue Jul 24 11:06:31 2012 +0200
++++ b/pypy/jit/backend/x86/assembler.py	Tue Jul 24 14:29:36 2012 +0200
+@@ -1849,6 +1849,7 @@
+     CODE_INPUTARG   = 8 | DESCR_SPECIAL
+ 
+     def write_failure_recovery_description(self, mc, failargs, locs):
++        char_count = 0
+         for i in range(len(failargs)):
+             arg = failargs[i]
+             if arg is not None:
+@@ -1865,6 +1866,7 @@
+                     pos = loc.position
+                     if pos < 0:
+                         mc.writechar(chr(self.CODE_INPUTARG))
++                        char_count += 1
+                         pos = ~pos
+                     n = self.CODE_FROMSTACK//4 + pos
+                 else:
+@@ -1873,11 +1875,17 @@
+                 n = kind + 4*n
+                 while n > 0x7F:
+                     mc.writechar(chr((n & 0x7F) | 0x80))
++                    char_count += 1
+                     n >>= 7
+             else:
+                 n = self.CODE_HOLE
+             mc.writechar(chr(n))
++            char_count += 1
+         mc.writechar(chr(self.CODE_STOP))
++        char_count += 1
++        debug_start('jit-backend-guard-size')
++        debug_print("chars %s" % char_count)
++        debug_stop('jit-backend-guard-size')
+         # assert that the fail_boxes lists are big enough
+         assert len(failargs) <= self.fail_boxes_int.SIZE
+ 
diff --git a/talk/vmil2012/tool/run_benchmarks.sh b/talk/vmil2012/tool/run_benchmarks.sh
--- a/talk/vmil2012/tool/run_benchmarks.sh
+++ b/talk/vmil2012/tool/run_benchmarks.sh
@@ -4,9 +4,32 @@
 bench_list="${base}/logs/benchs.txt"
 benchmarks="${base}/pypy-benchmarks"
 REV="ff7b35837d0f"
-pypy=$(which pypy)
+pypy_co="${base}/pypy"
+PYPYREV='release-1.9'
+pypy="${pypy_co}/pypy-c"
 pypy_opts=",--jit enable_opts=intbounds:rewrite:virtualize:string:pure:heap:ffi"
 baseline=$(which true)
+logopts='jit-backend-dump,jit-backend-guard-size,jit-log-opt,jit-log-noopt'
+# checkout and build a pypy-c version
+if [ ! -d "${pypy_co}" ]; then
+  echo "Cloning pypy repository to ${pypy_co}"
+  hg clone https://bivab@bitbucket.org/pypy/pypy "${pypy_co}"
+fi
+#
+cd "${pypy_co}"
+echo "updating pypy to fixed revision ${PYPYREV}"
+hg update "${PYPYREV}"
+echo "Patching pypy"
+patch -p1 -N < "$base/tool/ll_resume_data_count.patch"
+#
+echo "Checking for an existing pypy-c"
+if [ ! -x "${pypy-c}" ]
+then
+  pypy/bin/rpython -Ojit pypy/translator/goal/targetpypystandalone.py
+else
+    echo "found!"
+fi
+
 
 # setup a checkout of the pypy benchmarks and update to a fixed revision
 if [ ! -d "${benchmarks}" ]; then
@@ -16,7 +39,7 @@
   echo "updating benchmarks to fixed revision ${REV}"
   hg update "${REV}"
   echo "Patching benchmarks to pass PYPYLOG to benchmarks"
-  patch -p1 < "$base/tool/env.patch" 
+  patch -p1 < "$base/tool/env.patch"
 else
   cd "${benchmarks}"
   echo "Clone of pypy/benchmarks already present, reverting changes in the checkout"
@@ -24,13 +47,13 @@
   echo "updating benchmarks to fixed revision ${REV}"
   hg update "${REV}"
   echo "Patching benchmarks to pass PYPYLOG to benchmarks"
-  patch -p1 < "$base/tool/env.patch" 
+  patch -p1 < "$base/tool/env.patch"
 fi
 
 # run each benchmark defined on $bench_list
 while read line
 do
     logname="${base}/logs/logbench.$(basename "${pypy}").${line}"
-    export PYPYLOG="jit:$logname"
+    export PYPYLOG="${logopts}:$logname"
     bash -c "./runner.py --changed=\"${pypy}\" --args=\"${pypy_opts}\" --benchmarks=${line}"
 done < $bench_list
diff --git a/talk/vmil2012/tool/table_template.tex b/talk/vmil2012/tool/table_template.tex
--- a/talk/vmil2012/tool/table_template.tex
+++ b/talk/vmil2012/tool/table_template.tex
@@ -1,5 +1,5 @@
-\begin{table}
-    \centering
+\begin{center}
+{\smaller
     \begin{tabular}{ {%for c in head %} |l| {% endfor %} }
     \hline
     {% for col in head %}
@@ -21,6 +21,5 @@
     {% endfor %}
     \hline
     \end{tabular}
-    \caption{'fff'}
-    \label{'fff'}
-\end{table}
+}
+\end{center}