[pypy-commit] extradoc extradoc: re-run with no-jit build and add scaling micro-bench

Tue May 27 18:38:57 CEST 2014

Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch: extradoc
Changeset: r5276:d74b99852f5a
Date: 2014-05-27 18:40 +0200
http://bitbucket.org/pypy/extradoc/changeset/d74b99852f5a/

Log:	re-run with no-jit build and add scaling micro-bench

diff --git a/talk/dls2014/paper/TODO b/talk/dls2014/paper/TODO
--- a/talk/dls2014/paper/TODO
+++ b/talk/dls2014/paper/TODO
@@ -1,3 +1,4 @@
+* discuss removal GIL ruby paper
 * measure things
 ** memory: residual & GC-numbers
 ** overhead: breakdown (maybe also with multiple threads)
diff --git a/talk/dls2014/paper/paper.tex b/talk/dls2014/paper/paper.tex
--- a/talk/dls2014/paper/paper.tex
+++ b/talk/dls2014/paper/paper.tex
@@ -1019,14 +1019,14 @@
 reach $N$ and means we need $N\times$ the memory for the private
 pages alone. In this benchmark, we see the same spikes as the memory
 usage. These come directly from re-sharing the pages. The maximum page
-privatisation is around $0.5$ between major collections, thus the
-private pages are responsible for a $50\%$ increase in the required
+privatisation is around $0.9$ between major collections, thus the
+private pages are responsible for a $90\%$ increase in the required
 memory. Since the spikes in the GC managed memory line actually
-show increases by $~80\%$, it means that the rest comes from actual
+show increases by $~100\%$, it means that the rest comes from actual
 garbage objects that were collected.
 
-For PyPy-STM the average memory requirement is 29~MiB and there are
-$\sim 11$ major collections during the runtime. Normal PyPy with a GIL
+For PyPy-STM the average memory requirement is 21~MiB and there are
+$\sim 10$ major collections during the runtime. Normal PyPy with a GIL
 grows its memory up to just 7~MiB and does not do a single major
 collection in that time. Compared to normal PyPy, we are missing a
 memory optimisation to store small objects in a more compact
@@ -1034,6 +1034,8 @@
 duplicate any data structures like e.g. the Nursery for each
 thread. This, the missing optimisation, and the additional memory
 requirements for STM explained above account for this difference.
+We expect to improve this aspect in the future, in this paper we
+want to focus first on performance.
 \remi{I don't know how much sense it makes to go deeper. We will
 improve this in the future, but right now this is the overall picture.}
 
@@ -1062,7 +1064,29 @@
 
 \subsection{Scaling}
 
-maybe some simple micro benchmarks with adaptable conflict rate
+To asses how well the STM system scales on its own (without any real
+workload), we execute the following loop on 1 to 4 threads:
+\begin{lstlisting}
+def workload():
+    i = 20000000
+    while i:
+        i -= 1
+\end{lstlisting}
+
+For the results in figure \ref{fig:scaling}, we averaged
+over 5 runs and normalised the average runtimes to the
+time it took on a single thread. From this we see that there
+is additional overhead introduced by each thread ($13\%$
+for all 4 threads together).
+
+\remi{what we don't show is by how much this overhead is influenced
+by allocations}
+
+\begin{figure}[h]
+  \centering
+  \includegraphics[width=1\columnwidth]{plots/scaling.pdf}
+  \caption{Scalability of the STM system\label{fig:scaling}}
+\end{figure}
 
 
 \subsection{Real-World Benchmarks\label{sec:real-world-bench}}
diff --git a/talk/dls2014/paper/plots/bench_scaling.py b/talk/dls2014/paper/plots/bench_scaling.py
new file mode 100644
--- /dev/null
+++ b/talk/dls2014/paper/plots/bench_scaling.py
@@ -0,0 +1,20 @@
+import thread
+import sys
+
+
+lock = thread.allocate_lock()
+
+def workload():
+    i = 20000000
+    while i:
+        i -= 1
+    lock.release()
+
+running = range(int(sys.argv[1]))
+
+lock.acquire()
+for i in running[:]:
+    thread.start_new_thread(workload, ())
+lock.acquire()
+print "done"
+#import os; os._exit(0)
diff --git a/talk/dls2014/paper/plots/plot_richards_mem.py b/talk/dls2014/paper/plots/plot_richards_mem.py
--- a/talk/dls2014/paper/plots/plot_richards_mem.py
+++ b/talk/dls2014/paper/plots/plot_richards_mem.py
@@ -67,8 +67,8 @@
     ax.set_ylim(0, 50)
 
     ax2 = ax.twinx()
-    ax.set_xlim(-0.5, 11.8)
-    ax2.set_ylim(0, 1)
+    ax.set_xlim(-0.5, 9.8)
+    ax2.set_ylim(0, 1.5)
     ax2.set_ylabel("Ratio = ${private~pages}\over{shared~pages}$",
                    color='r')
     legend = plot_mems(ax, ax2)
diff --git a/talk/dls2014/paper/plots/plot_scaling.py b/talk/dls2014/paper/plots/plot_scaling.py
new file mode 100755
--- /dev/null
+++ b/talk/dls2014/paper/plots/plot_scaling.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python
+
+# obtained with time on
+#   pypy-c --jit off bench_scaling.py [1-4]
+
+
+import matplotlib
+import os
+import sys
+matplotlib.use('gtkagg')
+
+from matplotlib import rc
+#rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
+## for Palatino and other serif fonts use:
+rc('font',**{'family':'serif','serif':['Palatino']})
+rc('text', usetex=True)
+
+args = None
+import matplotlib.pyplot as plt
+# import pprint - slow as hell
+
+xs = range(1,5)
+ys = [[1.73, 1.74, 1.73, 1.73, 1.74],
+      [1.75, 1.77, 1.78, 1.75, 1.75],
+      [1.8, 1.79, 1.76, 1.76, 1.79],
+      [1.82, 2.1, 1.84, 1.9, 2.13]]
+
+
+
+def plot_mems(ax):
+    import numpy as np
+    y = []
+    yerr = []
+    opt_y = [1.0] * len(xs)
+    first_time = np.mean(ys[0])
+    for x, d in zip(xs, ys):
+        normalized = map(lambda x:x/first_time, d)
+        y.append(np.mean(normalized))
+        yerr.append(np.std(normalized))
+
+    print y
+    ax.errorbar(xs, y, yerr=yerr,
+                label="STM")
+    ax.plot(xs, opt_y, label="optimal")
+    return ax.legend(loc=4)
+
+
+def main():
+    global fig
+
+    print "Draw..."
+    fig = plt.figure()
+
+    ax = fig.add_subplot(111)
+
+    ax.set_ylabel("Runtime normalized to 1 thread")
+    ax.set_xlabel("Threads")
+    ax.set_ylim(0, 1.5)
+    ax.set_xlim(0, 5)
+
+    legend = plot_mems(ax)
+
+
+    #axs[0].set_ylim(0, len(x))
+    #ax.set_yticks([r+0.5 for r in range(len(logs))])
+    #ax.set_yticklabels(range(1, len(logs)+1))
+    #axs[0].set_xticks([])
+
+    # def label_format(x, pos):
+    #     return "%.2f" % (abs((x - left) * 1e-6), )
+    # major_formatter = matplotlib.ticker.FuncFormatter(label_format)
+    # axs[0].xaxis.set_major_formatter(major_formatter)
+
+    #ax.set_title("Memory Usage in Richards")
+
+    plt.draw()
+    #plt.show()
+    print "Drawn."
+
+    file_name = "scaling.pdf"
+    plt.savefig(file_name, format='pdf',
+                bbox_extra_artists=(legend,),
+                bbox_inches='tight', pad_inches=0)
+
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description='Plot stm log files')
+    parser.add_argument('--figure-size', default='6x4',
+                        help='set figure size in inches: format=6x4')
+    parser.add_argument('--font-size', default='10.0',
+                        help='set font size in pts: 10.0')
+    parser.add_argument('--png-dpi', default='300',
+                        help='set dpi of png output: 300')
+
+
+    args = parser.parse_args()
+    matplotlib.rcParams.update(
+        {'figure.figsize': tuple(map(int, args.figure_size.split('x'))),
+         'font.size': float(args.font_size),
+         'savefig.dpi': int(args.png_dpi),
+         })
+
+
+    main()
diff --git a/talk/dls2014/paper/plots/richards_mem.pdf b/talk/dls2014/paper/plots/richards_mem.pdf
index c2e6b5c0924fe38ec35ab1a467cda4f6c3810450..ac151b083994c0792d0a271a1b6bd3f9cb688cfc
GIT binary patch

[cut]
diff --git a/talk/dls2014/paper/plots/scaling.pdf b/talk/dls2014/paper/plots/scaling.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..3b5b43b271d3c03d3edf230a38d24389b7bfdb6c
GIT binary patch

[cut]