[pypy-commit] extradoc extradoc: re-run with no-jit build and add scaling micro-bench
Raemi
noreply at buildbot.pypy.org
Tue May 27 18:38:57 CEST 2014
Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: extradoc
Changeset: r5276:d74b99852f5a
Date: 2014-05-27 18:40 +0200
http://bitbucket.org/pypy/extradoc/changeset/d74b99852f5a/
Log: re-run with no-jit build and add scaling micro-bench
diff --git a/talk/dls2014/paper/TODO b/talk/dls2014/paper/TODO
--- a/talk/dls2014/paper/TODO
+++ b/talk/dls2014/paper/TODO
@@ -1,3 +1,4 @@
+* discuss removal GIL ruby paper
* measure things
** memory: residual & GC-numbers
** overhead: breakdown (maybe also with multiple threads)
diff --git a/talk/dls2014/paper/paper.tex b/talk/dls2014/paper/paper.tex
--- a/talk/dls2014/paper/paper.tex
+++ b/talk/dls2014/paper/paper.tex
@@ -1019,14 +1019,14 @@
reach $N$ and means we need $N\times$ the memory for the private
pages alone. In this benchmark, we see the same spikes as the memory
usage. These come directly from re-sharing the pages. The maximum page
-privatisation is around $0.5$ between major collections, thus the
-private pages are responsible for a $50\%$ increase in the required
+privatisation is around $0.9$ between major collections, thus the
+private pages are responsible for a $90\%$ increase in the required
memory. Since the spikes in the GC managed memory line actually
-show increases by $~80\%$, it means that the rest comes from actual
+show increases by $~100\%$, it means that the rest comes from actual
garbage objects that were collected.
-For PyPy-STM the average memory requirement is 29~MiB and there are
-$\sim 11$ major collections during the runtime. Normal PyPy with a GIL
+For PyPy-STM the average memory requirement is 21~MiB and there are
+$\sim 10$ major collections during the runtime. Normal PyPy with a GIL
grows its memory up to just 7~MiB and does not do a single major
collection in that time. Compared to normal PyPy, we are missing a
memory optimisation to store small objects in a more compact
@@ -1034,6 +1034,8 @@
duplicate any data structures like e.g. the Nursery for each
thread. This, the missing optimisation, and the additional memory
requirements for STM explained above account for this difference.
+We expect to improve this aspect in the future, in this paper we
+want to focus first on performance.
\remi{I don't know how much sense it makes to go deeper. We will
improve this in the future, but right now this is the overall picture.}
@@ -1062,7 +1064,29 @@
\subsection{Scaling}
-maybe some simple micro benchmarks with adaptable conflict rate
+To asses how well the STM system scales on its own (without any real
+workload), we execute the following loop on 1 to 4 threads:
+\begin{lstlisting}
+def workload():
+ i = 20000000
+ while i:
+ i -= 1
+\end{lstlisting}
+
+For the results in figure \ref{fig:scaling}, we averaged
+over 5 runs and normalised the average runtimes to the
+time it took on a single thread. From this we see that there
+is additional overhead introduced by each thread ($13\%$
+for all 4 threads together).
+
+\remi{what we don't show is by how much this overhead is influenced
+by allocations}
+
+\begin{figure}[h]
+ \centering
+ \includegraphics[width=1\columnwidth]{plots/scaling.pdf}
+ \caption{Scalability of the STM system\label{fig:scaling}}
+\end{figure}
\subsection{Real-World Benchmarks\label{sec:real-world-bench}}
diff --git a/talk/dls2014/paper/plots/bench_scaling.py b/talk/dls2014/paper/plots/bench_scaling.py
new file mode 100644
--- /dev/null
+++ b/talk/dls2014/paper/plots/bench_scaling.py
@@ -0,0 +1,20 @@
+import thread
+import sys
+
+
+lock = thread.allocate_lock()
+
+def workload():
+ i = 20000000
+ while i:
+ i -= 1
+ lock.release()
+
+running = range(int(sys.argv[1]))
+
+lock.acquire()
+for i in running[:]:
+ thread.start_new_thread(workload, ())
+lock.acquire()
+print "done"
+#import os; os._exit(0)
diff --git a/talk/dls2014/paper/plots/plot_richards_mem.py b/talk/dls2014/paper/plots/plot_richards_mem.py
--- a/talk/dls2014/paper/plots/plot_richards_mem.py
+++ b/talk/dls2014/paper/plots/plot_richards_mem.py
@@ -67,8 +67,8 @@
ax.set_ylim(0, 50)
ax2 = ax.twinx()
- ax.set_xlim(-0.5, 11.8)
- ax2.set_ylim(0, 1)
+ ax.set_xlim(-0.5, 9.8)
+ ax2.set_ylim(0, 1.5)
ax2.set_ylabel("Ratio = ${private~pages}\over{shared~pages}$",
color='r')
legend = plot_mems(ax, ax2)
diff --git a/talk/dls2014/paper/plots/plot_scaling.py b/talk/dls2014/paper/plots/plot_scaling.py
new file mode 100755
--- /dev/null
+++ b/talk/dls2014/paper/plots/plot_scaling.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python
+
+# obtained with time on
+# pypy-c --jit off bench_scaling.py [1-4]
+
+
+import matplotlib
+import os
+import sys
+matplotlib.use('gtkagg')
+
+from matplotlib import rc
+#rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
+## for Palatino and other serif fonts use:
+rc('font',**{'family':'serif','serif':['Palatino']})
+rc('text', usetex=True)
+
+args = None
+import matplotlib.pyplot as plt
+# import pprint - slow as hell
+
+xs = range(1,5)
+ys = [[1.73, 1.74, 1.73, 1.73, 1.74],
+ [1.75, 1.77, 1.78, 1.75, 1.75],
+ [1.8, 1.79, 1.76, 1.76, 1.79],
+ [1.82, 2.1, 1.84, 1.9, 2.13]]
+
+
+
+def plot_mems(ax):
+ import numpy as np
+ y = []
+ yerr = []
+ opt_y = [1.0] * len(xs)
+ first_time = np.mean(ys[0])
+ for x, d in zip(xs, ys):
+ normalized = map(lambda x:x/first_time, d)
+ y.append(np.mean(normalized))
+ yerr.append(np.std(normalized))
+
+ print y
+ ax.errorbar(xs, y, yerr=yerr,
+ label="STM")
+ ax.plot(xs, opt_y, label="optimal")
+ return ax.legend(loc=4)
+
+
+def main():
+ global fig
+
+ print "Draw..."
+ fig = plt.figure()
+
+ ax = fig.add_subplot(111)
+
+ ax.set_ylabel("Runtime normalized to 1 thread")
+ ax.set_xlabel("Threads")
+ ax.set_ylim(0, 1.5)
+ ax.set_xlim(0, 5)
+
+ legend = plot_mems(ax)
+
+
+ #axs[0].set_ylim(0, len(x))
+ #ax.set_yticks([r+0.5 for r in range(len(logs))])
+ #ax.set_yticklabels(range(1, len(logs)+1))
+ #axs[0].set_xticks([])
+
+ # def label_format(x, pos):
+ # return "%.2f" % (abs((x - left) * 1e-6), )
+ # major_formatter = matplotlib.ticker.FuncFormatter(label_format)
+ # axs[0].xaxis.set_major_formatter(major_formatter)
+
+ #ax.set_title("Memory Usage in Richards")
+
+ plt.draw()
+ #plt.show()
+ print "Drawn."
+
+ file_name = "scaling.pdf"
+ plt.savefig(file_name, format='pdf',
+ bbox_extra_artists=(legend,),
+ bbox_inches='tight', pad_inches=0)
+
+
+
+if __name__ == "__main__":
+ import argparse
+ parser = argparse.ArgumentParser(description='Plot stm log files')
+ parser.add_argument('--figure-size', default='6x4',
+ help='set figure size in inches: format=6x4')
+ parser.add_argument('--font-size', default='10.0',
+ help='set font size in pts: 10.0')
+ parser.add_argument('--png-dpi', default='300',
+ help='set dpi of png output: 300')
+
+
+ args = parser.parse_args()
+ matplotlib.rcParams.update(
+ {'figure.figsize': tuple(map(int, args.figure_size.split('x'))),
+ 'font.size': float(args.font_size),
+ 'savefig.dpi': int(args.png_dpi),
+ })
+
+
+ main()
diff --git a/talk/dls2014/paper/plots/richards_mem.pdf b/talk/dls2014/paper/plots/richards_mem.pdf
index c2e6b5c0924fe38ec35ab1a467cda4f6c3810450..ac151b083994c0792d0a271a1b6bd3f9cb688cfc
GIT binary patch
[cut]
diff --git a/talk/dls2014/paper/plots/scaling.pdf b/talk/dls2014/paper/plots/scaling.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..3b5b43b271d3c03d3edf230a38d24389b7bfdb6c
GIT binary patch
[cut]
More information about the pypy-commit
mailing list