[pypy-commit] extradoc extradoc: commit
cfbolz
noreply at buildbot.pypy.org
Thu Jul 26 11:14:18 CEST 2012
Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: extradoc
Changeset: r4371:7706cb52355c
Date: 2012-07-26 11:13 +0200
http://bitbucket.org/pypy/extradoc/changeset/7706cb52355c/
Log: commit
diff --git a/talk/vmil2012/Makefile b/talk/vmil2012/Makefile
--- a/talk/vmil2012/Makefile
+++ b/talk/vmil2012/Makefile
@@ -1,5 +1,5 @@
-jit-guards.pdf: paper.tex paper.bib figures/log.tex figures/example.tex figures/benchmarks_table.tex
+jit-guards.pdf: paper.tex paper.bib figures/log.tex figures/example.tex figures/benchmarks_table.tex figures/backend_table.tex
pdflatex paper
bibtex paper
pdflatex paper
@@ -18,12 +18,18 @@
%.tex: %.py
pygmentize -l python -o $@ $<
-figures/benchmarks_table.tex: tool/build_tables.py logs/summary.csv tool/table_template.tex
+figures/%_table.tex: tool/build_tables.py logs/backend_summary.csv logs/summary.csv tool/table_template.tex
tool/setup.sh
- paper_env/bin/python tool/build_tables.py logs/summary.csv tool/table_template.tex figures/benchmarks_table.tex
+ paper_env/bin/python tool/build_tables.py $@
+
+logs/logbench*:;
logs/summary.csv: logs/logbench* tool/difflogs.py
- python tool/difflogs.py --diffall logs
+ @if ls logs/logbench* &> /dev/null; then python tool/difflogs.py --diffall logs; fi
+
+logs/backend_summary.csv: logs/logbench* tool/backenddata.py
+ @if ls logs/logbench* &> /dev/null; then python tool/backenddata.py logs; fi
logs::
tool/run_benchmarks.sh
+
diff --git a/talk/vmil2012/logs/backend_summary.csv b/talk/vmil2012/logs/backend_summary.csv
new file mode 100644
--- /dev/null
+++ b/talk/vmil2012/logs/backend_summary.csv
@@ -0,0 +1,12 @@
+exe,bench,asm size,guard map size
+pypy-c,chaos,154,24
+pypy-c,crypto_pyaes,167,24
+pypy-c,django,220,47
+pypy-c,go,4802,874
+pypy-c,pyflate-fast,719,150
+pypy-c,raytrace-simple,486,75
+pypy-c,richards,153,17
+pypy-c,spambayes,2502,337
+pypy-c,sympy_expand,918,211
+pypy-c,telco,506,77
+pypy-c,twisted_names,1604,211
diff --git a/talk/vmil2012/logs/summary.csv b/talk/vmil2012/logs/summary.csv
--- a/talk/vmil2012/logs/summary.csv
+++ b/talk/vmil2012/logs/summary.csv
@@ -1,12 +1,12 @@
exe,bench,number of loops,new before,new after,get before,get after,set before,set after,guard before,guard after,numeric before,numeric after,rest before,rest after
-pypy,chaos,32,1810,186,1874,928,8996,684,598,242,1024,417,7603,2711
-pypy,crypto_pyaes,35,1385,234,1066,641,9660,873,373,110,1333,735,5976,3435
-pypy,django,39,1328,184,2711,1125,8251,803,884,275,623,231,7847,2831
-pypy,go,870,59577,4874,93474,32476,373715,22356,21449,7742,20792,7191,217142,78327
-pypy,pyflate-fast,147,5797,781,7654,3346,38540,2394,1977,1031,3805,1990,28135,12097
-pypy,raytrace-simple,115,7001,629,6283,2664,43793,2788,2078,861,2263,1353,28079,9234
-pypy,richards,51,1933,84,2614,1009,15947,569,634,268,700,192,10633,3430
-pypy,spambayes,477,16535,2861,29399,13143,114323,17032,6620,2318,13209,5387,75324,32570
-pypy,sympy_expand,174,6485,1067,10328,4131,36197,4078,2981,956,2493,1133,34017,11162
-pypy,telco,93,7289,464,9825,2244,40435,2559,2063,473,2833,964,35278,8996
-pypy,twisted_names,260,15575,2246,28618,10050,94792,9744,7838,1792,9127,2978,78420,25893
+pypy-c,chaos,32,1810,186,1874,928,8996,684,598,242,1024,417,7603,2711
+pypy-c,crypto_pyaes,35,1385,234,1066,641,9660,873,373,110,1333,735,5976,3435
+pypy-c,django,39,1328,184,2711,1125,8251,803,884,275,623,231,7847,2831
+pypy-c,go,870,59577,4874,93474,32476,373715,22356,21449,7742,20792,7191,217142,78327
+pypy-c,pyflate-fast,147,5797,781,7654,3346,38540,2394,1977,1031,3805,1990,28135,12097
+pypy-c,raytrace-simple,115,7001,629,6283,2664,43793,2788,2078,861,2263,1353,28079,9234
+pypy-c,richards,51,1933,84,2614,1009,15947,569,634,268,700,192,10633,3430
+pypy-c,spambayes,472,16117,2832,28469,12885,110877,16673,6419,2280,12936,5293,73480,31978
+pypy-c,sympy_expand,174,6485,1067,10328,4131,36197,4078,2981,956,2493,1133,34017,11162
+pypy-c,telco,93,7289,464,9825,2244,40435,2559,2063,473,2833,964,35278,8996
+pypy-c,twisted_names,235,14357,2012,26042,9251,88092,8553,7125,1656,8216,2649,71912,23881
diff --git a/talk/vmil2012/paper.tex b/talk/vmil2012/paper.tex
--- a/talk/vmil2012/paper.tex
+++ b/talk/vmil2012/paper.tex
@@ -354,9 +354,9 @@
\noindent
\centering
\begin{minipage}{1\columnwidth}
- \begin{lstlisting}
- i8 = int_eq(i6, 1)
- guard_false(i8) [i6, i1, i0]
+ \begin{lstlisting}[mathescape]
+$b_1$ = int_eq($i_2$, 1)
+guard_false($b_1$)
\end{lstlisting}
\end{minipage}
\begin{minipage}{.40\columnwidth}
@@ -455,7 +455,54 @@
\section{Evaluation}
\label{sec:evaluation}
-\include{figures/benchmarks_table}
+The following analysis is based on a selection of benchmarks taken from the set
+of benchmarks used to measure the performance of PyPy as can be seen
+on\footnote{http://speed.pypy.org/}. The selection is based on the following
+criteria \bivab{??}. The benchmarks were taken from the PyPy benchmarks
+repository using revision
+\texttt{ff7b35837d0f}\footnote{https://bitbucket.org/pypy/benchmarks/src/ff7b35837d0f}.
+The benchmarks were run on a version of PyPy based on the
+tag~\texttt{release-1.9} and patched to collect additional data about the
+guards in the machine code
+backends\footnote{https://bitbucket.org/pypy/pypy/src/release-1.9}. All
+benchmark data was collected on a MacBook Pro 64 bit running Max OS
+10.7.4 \bivab{do we need more data for this kind of benchmarks} with the loop
+unrolling optimization disabled\bivab{rationale?}.
+
+Figure~\ref{fig:ops_count} shows the total number of operations that are
+recorded during tracing for each of the benchmarks on what percentage of these
+are guards. Figure~\ref{fig:ops_count} also shows the number of operations left
+after performing the different trace optimizations done by the trace optimizer,
+such as xxx. The last columns show the overall optimization rate and the
+optimization rate specific for guard operations, showing what percentage of the
+operations was removed during the optimizations phase.
+
+\begin{figure*}
+ \include{figures/benchmarks_table}
+ \caption{Benchmark Results}
+ \label{fig:ops_count}
+\end{figure*}
+
+\bivab{should we rather count the trampolines as part of the guard data instead
+of counting it as part of the instructions}
+
+Figure~\ref{fig:backend_data} shows
+the total memory consumption of the code and of the data generated by the machine code
+backend for the different benchmarks mentioned above. Meaning the operations
+left after optimization take the space shown in Figure~\ref{fig:backend_data}
+after being compiled. Also the additional data stored for the guards to be used
+in case of a bailout and attaching a bridge.
+\begin{figure*}
+ \include{figures/backend_table}
+ \caption{Total size of generated machine code and guard data}
+ \label{fig:backend_data}
+\end{figure*}
+
+Both figures do not take into account garbage collection. Pieces of machine
+code can be globally invalidated or just become cold again. In both cases the
+generated machine code and the related data is garbage collected. The figures
+show the total amount of operations that are evaluated by the JIT and the
+total amount of code and data that is generated from the optimized traces.
* Evaluation
* Measure guard memory consumption and machine code size
diff --git a/talk/vmil2012/tool/backenddata.py b/talk/vmil2012/tool/backenddata.py
new file mode 100644
--- /dev/null
+++ b/talk/vmil2012/tool/backenddata.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+"""
+Parse and summarize the traces produced by pypy-c-jit when PYPYLOG is set.
+only works for logs when unrolling is disabled
+"""
+
+import csv
+import optparse
+import os
+import re
+import sys
+from pypy.jit.metainterp.history import ConstInt
+from pypy.jit.tool.oparser import parse
+from pypy.rpython.lltypesystem import llmemory, lltype
+from pypy.tool import logparser
+
+
+def collect_logfiles(path):
+ if not os.path.isdir(path):
+ logs = [os.path.basename(path)]
+ else:
+ logs = os.listdir(path)
+ all = []
+ for log in logs:
+ parts = log.split(".")
+ if len(parts) != 3:
+ continue
+ l, exe, bench = parts
+ if l != "logbench":
+ continue
+ all.append((exe, bench, log))
+ all.sort()
+ return all
+
+
+def collect_guard_data(log):
+ """Calculate the total size in bytes of the locations maps for all guards
+ in a logfile"""
+ guards = logparser.extract_category(log, 'jit-backend-guard-size')
+ return sum(int(x[6:]) for x in guards if x.startswith('chars'))
+
+
+def collect_asm_size(log, guard_size=0):
+ """Calculate the size of the machine code pieces of a logfile. If
+ guard_size is passed it is substracted from result under the assumption
+ that the guard location maps are encoded in the instruction stream"""
+ asm = logparser.extract_category(log, 'jit-backend-dump')
+ asmlen = 0
+ for block in asm:
+ expr = re.compile("CODE_DUMP @\w+ \+\d+\s+(.*$)")
+ match = expr.search(block)
+ assert match is not None # no match found
+ code = match.group(1)
+ asmlen += len(code)
+ return asmlen - guard_size
+
+
+def collect_data(dirname, logs):
+ for exe, name, log in logs:
+ path = os.path.join(dirname, log)
+ logfile = logparser.parse_log_file(path)
+ guard_size = collect_guard_data(logfile)
+ asm_size = collect_asm_size(logfile, guard_size)
+ yield (exe, name, log, asm_size, guard_size)
+
+
+def main(path):
+ logs = collect_logfiles(path)
+ if os.path.isdir(path):
+ dirname = path
+ else:
+ dirname = os.path.dirname(path)
+ results = collect_data(dirname, logs)
+
+ with file("logs/backend_summary.csv", "w") as f:
+ csv_writer = csv.writer(f)
+ row = ["exe", "bench", "asm size", "guard map size"]
+ csv_writer.writerow(row)
+ print row
+ for exe, bench, log, asm_size, guard_size in results:
+ row = [exe, bench, asm_size / 1024, guard_size / 1024]
+ csv_writer.writerow(row)
+ print row
+
+if __name__ == '__main__':
+ parser = optparse.OptionParser(usage="%prog logdir_or_file")
+
+ options, args = parser.parse_args()
+ if len(args) != 1:
+ parser.print_help()
+ sys.exit(2)
+ else:
+ main(args[0])
diff --git a/talk/vmil2012/tool/build_tables.py b/talk/vmil2012/tool/build_tables.py
--- a/talk/vmil2012/tool/build_tables.py
+++ b/talk/vmil2012/tool/build_tables.py
@@ -2,25 +2,29 @@
import csv
import django
from django.template import Template, Context
-import optparse
-from os import path
+import os
import sys
-#
+# This line is required for Django configuration
+django.conf.settings.configure()
-def main(csvfile, template, texfile):
+def getlines(csvfile):
with open(csvfile, 'rb') as f:
reader = csv.DictReader(f, delimiter=',')
- lines = [l for l in reader]
+ return [l for l in reader]
+
+
+def build_ops_count_table(csvfile, texfile, template):
+ lines = getlines(csvfile)
head = ['Benchmark',
'ops b/o',
'\\% guards b/o',
'ops a/o',
'\\% guards a/o',
- 'opt. rate',
- 'guard opt. rate',]
+ 'opt. rate in \\%',
+ 'guard opt. rate in \\%']
table = []
# collect data
@@ -33,22 +37,45 @@
res = [
bench['bench'].replace('_', '\\_'),
ops_bo,
- "%.2f (%s)" % (guards_bo / ops_bo * 100, bench['guard before']),
+ "%.2f" % (guards_bo / ops_bo * 100,),
ops_ao,
- "%.2f (%s)" % (guards_ao / ops_ao * 100, bench['guard after']),
- "%.2f" % ((1 - ops_ao/ops_bo) * 100,),
- "%.2f" % ((1 - guards_ao/guards_bo) * 100,),
+ "%.2f" % (guards_ao / ops_ao * 100,),
+ "%.2f" % ((1 - ops_ao / ops_bo) * 100,),
+ "%.2f" % ((1 - guards_ao / guards_bo) * 100,),
]
table.append(res)
- output = render_table(template, head, table)
+ output = render_table(template, head, sorted(table))
+ write_table(output, texfile)
+
+
+def build_backend_count_table(csvfile, texfile, template):
+ lines = getlines(csvfile)
+
+ head = ['Benchmark',
+ 'Machine code size (kB)',
+ 'll resume data (kB)',
+ '\\% of machine code size']
+
+ table = []
+ # collect data
+ for bench in lines:
+ bench['bench'] = bench['bench'].replace('_', '\\_')
+ keys = ['bench', 'asm size', 'guard map size']
+ gmsize = int(bench['guard map size'])
+ asmsize = int(bench['asm size'])
+ rel = "%.2f" % (gmsize / asmsize * 100,)
+ table.append([bench[k] for k in keys] + [rel])
+ output = render_table(template, head, sorted(table))
+ write_table(output, texfile)
+
+
+def write_table(output, texfile):
# Write the output to a file
with open(texfile, 'w') as out_f:
out_f.write(output)
def render_table(ttempl, head, table):
- # This line is required for Django configuration
- django.conf.settings.configure()
# open and read template
with open(ttempl) as f:
t = Template(f.read())
@@ -56,12 +83,25 @@
return t.render(c)
+tables = {
+ 'benchmarks_table.tex':
+ ('summary.csv', build_ops_count_table),
+ 'backend_table.tex':
+ ('backend_summary.csv', build_backend_count_table)
+ }
+
+
+def main(table):
+ tablename = os.path.basename(table)
+ if tablename not in tables:
+ raise AssertionError('unsupported table')
+ data, builder = tables[tablename]
+ csvfile = os.path.join('logs', data)
+ texfile = os.path.join('figures', tablename)
+ template = os.path.join('tool', 'table_template.tex')
+ builder(csvfile, texfile, template)
+
+
if __name__ == '__main__':
- parser = optparse.OptionParser(usage="%prog csvfile template.tex output.tex")
- options, args = parser.parse_args()
- if len(args) < 3:
- parser.print_help()
- sys.exit(2)
- else:
- main(args[0], args[1], args[2])
-
+ assert len(sys.argv) > 1
+ main(sys.argv[1])
diff --git a/talk/vmil2012/tool/ll_resume_data_count.patch b/talk/vmil2012/tool/ll_resume_data_count.patch
new file mode 100644
--- /dev/null
+++ b/talk/vmil2012/tool/ll_resume_data_count.patch
@@ -0,0 +1,37 @@
+diff -r eec77c3e87d6 pypy/jit/backend/x86/assembler.py
+--- a/pypy/jit/backend/x86/assembler.py Tue Jul 24 11:06:31 2012 +0200
++++ b/pypy/jit/backend/x86/assembler.py Tue Jul 24 14:29:36 2012 +0200
+@@ -1849,6 +1849,7 @@
+ CODE_INPUTARG = 8 | DESCR_SPECIAL
+
+ def write_failure_recovery_description(self, mc, failargs, locs):
++ char_count = 0
+ for i in range(len(failargs)):
+ arg = failargs[i]
+ if arg is not None:
+@@ -1865,6 +1866,7 @@
+ pos = loc.position
+ if pos < 0:
+ mc.writechar(chr(self.CODE_INPUTARG))
++ char_count += 1
+ pos = ~pos
+ n = self.CODE_FROMSTACK//4 + pos
+ else:
+@@ -1873,11 +1875,17 @@
+ n = kind + 4*n
+ while n > 0x7F:
+ mc.writechar(chr((n & 0x7F) | 0x80))
++ char_count += 1
+ n >>= 7
+ else:
+ n = self.CODE_HOLE
+ mc.writechar(chr(n))
++ char_count += 1
+ mc.writechar(chr(self.CODE_STOP))
++ char_count += 1
++ debug_start('jit-backend-guard-size')
++ debug_print("chars %s" % char_count)
++ debug_stop('jit-backend-guard-size')
+ # assert that the fail_boxes lists are big enough
+ assert len(failargs) <= self.fail_boxes_int.SIZE
+
diff --git a/talk/vmil2012/tool/run_benchmarks.sh b/talk/vmil2012/tool/run_benchmarks.sh
--- a/talk/vmil2012/tool/run_benchmarks.sh
+++ b/talk/vmil2012/tool/run_benchmarks.sh
@@ -4,9 +4,32 @@
bench_list="${base}/logs/benchs.txt"
benchmarks="${base}/pypy-benchmarks"
REV="ff7b35837d0f"
-pypy=$(which pypy)
+pypy_co="${base}/pypy"
+PYPYREV='release-1.9'
+pypy="${pypy_co}/pypy-c"
pypy_opts=",--jit enable_opts=intbounds:rewrite:virtualize:string:pure:heap:ffi"
baseline=$(which true)
+logopts='jit-backend-dump,jit-backend-guard-size,jit-log-opt,jit-log-noopt'
+# checkout and build a pypy-c version
+if [ ! -d "${pypy_co}" ]; then
+ echo "Cloning pypy repository to ${pypy_co}"
+ hg clone https://bivab@bitbucket.org/pypy/pypy "${pypy_co}"
+fi
+#
+cd "${pypy_co}"
+echo "updating pypy to fixed revision ${PYPYREV}"
+hg update "${PYPYREV}"
+echo "Patching pypy"
+patch -p1 -N < "$base/tool/ll_resume_data_count.patch"
+#
+echo "Checking for an existing pypy-c"
+if [ ! -x "${pypy-c}" ]
+then
+ pypy/bin/rpython -Ojit pypy/translator/goal/targetpypystandalone.py
+else
+ echo "found!"
+fi
+
# setup a checkout of the pypy benchmarks and update to a fixed revision
if [ ! -d "${benchmarks}" ]; then
@@ -16,7 +39,7 @@
echo "updating benchmarks to fixed revision ${REV}"
hg update "${REV}"
echo "Patching benchmarks to pass PYPYLOG to benchmarks"
- patch -p1 < "$base/tool/env.patch"
+ patch -p1 < "$base/tool/env.patch"
else
cd "${benchmarks}"
echo "Clone of pypy/benchmarks already present, reverting changes in the checkout"
@@ -24,13 +47,13 @@
echo "updating benchmarks to fixed revision ${REV}"
hg update "${REV}"
echo "Patching benchmarks to pass PYPYLOG to benchmarks"
- patch -p1 < "$base/tool/env.patch"
+ patch -p1 < "$base/tool/env.patch"
fi
# run each benchmark defined on $bench_list
while read line
do
logname="${base}/logs/logbench.$(basename "${pypy}").${line}"
- export PYPYLOG="jit:$logname"
+ export PYPYLOG="${logopts}:$logname"
bash -c "./runner.py --changed=\"${pypy}\" --args=\"${pypy_opts}\" --benchmarks=${line}"
done < $bench_list
diff --git a/talk/vmil2012/tool/table_template.tex b/talk/vmil2012/tool/table_template.tex
--- a/talk/vmil2012/tool/table_template.tex
+++ b/talk/vmil2012/tool/table_template.tex
@@ -1,5 +1,5 @@
-\begin{table}
- \centering
+\begin{center}
+{\smaller
\begin{tabular}{ {%for c in head %} |l| {% endfor %} }
\hline
{% for col in head %}
@@ -21,6 +21,5 @@
{% endfor %}
\hline
\end{tabular}
- \caption{'fff'}
- \label{'fff'}
-\end{table}
+}
+\end{center}
More information about the pypy-commit
mailing list