[pypy-svn] r66090 - pypy/extradoc/talk/icooolps2009/talk
cfbolz at codespeak.net
cfbolz at codespeak.net
Thu Jul 2 16:46:11 CEST 2009
Author: cfbolz
Date: Thu Jul 2 16:46:09 2009
New Revision: 66090
Added:
pypy/extradoc/talk/icooolps2009/talk/
pypy/extradoc/talk/icooolps2009/talk/Makefile
pypy/extradoc/talk/icooolps2009/talk/beamerouterthememy.sty
pypy/extradoc/talk/icooolps2009/talk/beamerthemeWarsaw.sty
pypy/extradoc/talk/icooolps2009/talk/talk.tex
Log:
Start with the ICOOOLPS slides for the tracing talk. This is how I have given
the talk in Bad Honnef, needs updating.
Added: pypy/extradoc/talk/icooolps2009/talk/Makefile
==============================================================================
--- (empty file)
+++ pypy/extradoc/talk/icooolps2009/talk/Makefile Thu Jul 2 16:46:09 2009
@@ -0,0 +1,12 @@
+%.pdf: %.eps
+ epstopdf $<
+
+viewtalk: talk.pdf
+ evince talk.pdf &
+
+clean:
+ rm talk.pdf
+
+talk.pdf: talk.tex beamerouterthememy.sty beamerthemeWarsaw.sty interp.pdf
+ pdflatex talk
+
Added: pypy/extradoc/talk/icooolps2009/talk/beamerouterthememy.sty
==============================================================================
--- (empty file)
+++ pypy/extradoc/talk/icooolps2009/talk/beamerouterthememy.sty Thu Jul 2 16:46:09 2009
@@ -0,0 +1,39 @@
+\ProvidesPackageRCS $Header: /cvsroot/latex-beamer/latex-beamer/themes/outer/beamerouterthemesplit.sty,v 1.4 2004/10/07 22:21:16 tantau Exp $
+
+% Copyright 2003 by Till Tantau <tantau at users.sourceforge.net>
+%
+% This program can be redistributed and/or modified under the terms
+% of the GNU Public License, version 2.
+
+\mode<presentation>
+
+\setbeamercolor{section in head/foot}{parent=palette quaternary}
+\setbeamercolor{subsection in head/foot}{parent=palette primary}
+
+\setbeamercolor{author in head/foot}{parent=section in head/foot}
+\setbeamercolor{title in head/foot}{parent=subsection in head/foot}
+
+
+
+\usesectionheadtemplate
+ {\hfill\insertsectionhead}
+ {\hfill\color{fg!50!bg}\insertsectionhead}
+
+
+
+
+\defbeamertemplate*{footline}{split theme}
+{%
+ \leavevmode%
+ \hbox{\begin{beamercolorbox}[wd=.6\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fill,rightskip=.3cm]{author in head/foot}%
+ \usebeamerfont{author in head/foot}\insertshortauthor
+ \end{beamercolorbox}%
+ \begin{beamercolorbox}[wd=.4\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}%
+ \usebeamerfont{title in head/foot}\insertshorttitle
+ \end{beamercolorbox}}%
+ \vskip0pt%
+}
+
+
+\mode
+<all>
Added: pypy/extradoc/talk/icooolps2009/talk/beamerthemeWarsaw.sty
==============================================================================
--- (empty file)
+++ pypy/extradoc/talk/icooolps2009/talk/beamerthemeWarsaw.sty Thu Jul 2 16:46:09 2009
@@ -0,0 +1,18 @@
+\ProvidesPackageRCS $Header: /cvsroot/latex-beamer/latex-beamer/themes/theme/beamerthemeWarsaw.sty,v 1.8 2004/10/07 20:53:10 tantau Exp $
+
+% Copyright 2003 by Till Tantau <tantau at users.sourceforge.net>
+%
+% This program can be redistributed and/or modified under the terms
+% of the GNU Public License, version 2.
+
+\mode<presentation>
+
+\useinnertheme[shadow=true]{rounded}
+\useoutertheme{my}
+\usecolortheme{orchid}
+\usecolortheme{whale}
+
+\setbeamerfont{block title}{size={}}
+
+\mode
+<all>
Added: pypy/extradoc/talk/icooolps2009/talk/talk.tex
==============================================================================
--- (empty file)
+++ pypy/extradoc/talk/icooolps2009/talk/talk.tex Thu Jul 2 16:46:09 2009
@@ -0,0 +1,539 @@
+\documentclass[utf8x]{beamer}
+
+\mode<presentation>
+{
+ \usetheme{Warsaw}
+
+ %\setbeamercovered{transparent}
+}
+
+
+\usepackage[english]{babel}
+
+\usepackage[utf8x]{inputenc}
+
+\usepackage{times}
+\usepackage[T1]{fontenc}
+
+\title[PyPy's Tracing JIT Compiler]{
+ Tracing the Meta-Level: PyPy's Tracing JIT Compiler
+}
+\author[Bolz, Cuni, Fijalkowski, Rigo]
+{
+ \textcolor{green!50!black}{Carl~Friedrich~Bolz}\inst{1} \and
+ Antonio Cuni\inst{2} \and
+ Maciej Fijalkowski\inst{3} \and
+ Armin Rigo
+}
+
+\institute[Düsseldorf]
+{
+ \inst{1}
+ Softwaretechnik und Programmiersprachen\\ Heinrich-Heine-Universit\"at D\"usseldorf
+ \and%
+ \vskip-2mm
+ \inst{2}
+ University of Genova, Italy
+ \and%
+ \vskip-2mm
+ \inst{3}
+ merlinux GmbH
+}
+
+\date{ICOOOLPS 2009 XXX}
+
+
+% Delete this, if you do not want the table of contents to pop up at
+% the beginning of each subsection:
+%\AtBeginSubsection[]
+%{
+% \begin{frame}<beamer>
+% \frametitle{Outline}
+% \tableofcontents[currentsection,currentsubsection]
+% \end{frame}
+%}
+
+
+% If you wish to uncover everything in a step-wise fashion, uncomment
+% the following command:
+
+%\beamerdefaultoverlayspecification{<+->}
+
+
+\begin{document}
+
+\begin{frame}
+ \titlepage
+\end{frame}
+
+%\begin{frame}
+% \frametitle{Outline}
+% \tableofcontents
+ % You might wish to add the option [pausesections]
+%\end{frame}
+
+\begin{frame}
+ \frametitle{Motivation}
+ \begin{itemize}
+ \item writing good JIT compilers for dynamic programming languages is hard and error-prone
+ \item tracing JIT compilers are a new approach to JITs that are supposed to be easier
+ \item what happens when a tracing JIT is applied ``one level down'', i.e. to an interpreter
+ \item how to solve the occurring problems
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Context: The PyPy Project}
+ \begin{itemize}
+ \item a general environment for implementing dynamic languages
+ \item contains a compiler for a subset of Python (``RPython'')
+ \item interpreters for dynamic languages written in that subset
+ \item various interpreters written with PyPy: Python, Prolog, Smalltalk, Scheme, JavaScript, GameBoy emulator
+ \item can be translated to a variety of target environment: C, JVM, .NET
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Tracing JIT Compilers}
+ \begin{itemize}
+ \item idea from Dynamo project: dynamic rewriting of machine code
+ \item later used for a lightweight Java JIT
+ \item seems to also work for dynamic languages
+ \item incorporated into Mozilla's JavaScript VM ("TraceMonkey")
+ \end{itemize}
+ \pause
+ \begin{block}{Basic Assumption of a Tracing JIT}
+ \begin{itemize}
+ \item programs spend most of their time executing loops
+ \item several iterations of a loop are likely to take similar code paths
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Tracing JIT Compilers}
+ \begin{itemize}
+ \item mixed-mode execuction environment
+ \item at first, everything is interpreted
+ \item lightweight profiling to discover hot loops
+ \item code generation only for common paths of hot loops
+ \item when a hot loop is discovered, start to produce a trace
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Tracing}
+ \begin{itemize}
+ \item a \emph{trace} is a sequential list of operations
+ \item a trace is produced by recording every operation the interpreter executes
+ \item tracing ends when the tracer sees a position in the program it has seen before
+ \item to identify these places, the \emph{position key} is used
+ \item the position key encodes the current point of execution
+ \item a trace thus corresponds to exactly one loop
+ \item that means it ends with a jump to its beginning
+ \end{itemize}
+ \pause
+ \begin{block}{Guards}
+ \begin{itemize}
+ \item the trace is only one of the possible code paths through the loop
+ \item at places where the path \emph{could} diverge, a guard is placed
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Code Generation and Execution}
+ \begin{itemize}
+ \item being linear, the trace can easily be turned into machine code
+ \item the machine code can be immediately executed
+ \item execution stops when a guard fails
+ \item after a guard failure, go back to interpreting program
+ \end{itemize}
+\end{frame}
+
+
+\frame[containsverbatim, plain, shrink=10]{
+ \frametitle{Example}
+ \begin{verbatim}
+def strange_sum(n):
+ result = 0
+ while n >= 0:
+ result = f(result, n)
+ n -= 1
+ return result
+
+def f(a, b):
+ if b % 46 == 41:
+ return a - b
+ else:
+ return a + b
+
+
+
+
+
+
+
+
+
+
+\end{verbatim}
+}
+
+\frame[containsverbatim, plain, shrink=10]{
+ \frametitle{Example}
+ \begin{verbatim}
+def strange_sum(n):
+ result = 0
+ while n >= 0:
+ result = f(result, n)
+ n -= 1
+ return result
+
+def f(a, b):
+ if b % 46 == 41:
+ return a - b
+ else:
+ return a + b
+
+# loop_header(result0, n0)
+# i0 = int_mod(n0, Const(46))
+# i1 = int_eq(i0, Const(41))
+# guard_false(i1)
+# result1 = int_add(result0, n0)
+# n1 = int_sub(n0, Const(1))
+# i2 = int_ge(n1, Const(0))
+# guard_true(i2)
+# jump(result1, n1)
+\end{verbatim}
+}
+
+\begin{frame}
+ \frametitle{(Dis-)Advantages of Tracing JITs}
+ \begin{block}{Good Points of the Approach}
+ \begin{itemize}
+ \item easy and fast machine code generation: needs so support only one path
+ \item interpreter does a lot of the work
+ \item can be added to an existing interpreter unobtrusively
+ \item automatic inlining
+ \item produces very little code
+ \end{itemize}
+ \end{block}
+ \pause
+ \begin{block}{Bad Points of the Approach}
+ \begin{itemize}
+ \item unclear whether assumptions are true often enough
+ \item switching between interpretation and machine code execution takes time
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Applying a Tracing JIT to an Interpreter}
+ \begin{itemize}
+ \item Question: What happens if the program is itself a bytecode interpreter?
+ \item the (usually only) hot loop of a bytecode interpreter is the bytecode dispatch loop
+ \item Assumption violated: two iterations of the dispatch loop will usually take very different code paths
+ \end{itemize}
+ \pause
+ \begin{block}{Terminology}
+ \begin{itemize}
+ \item \emph{tracing interpreter:} the interpreter that originally runs the program and produces traces
+ \item \emph{language interpreter:} the bytecode interpreter runs on top
+ \item \emph{user program:} the program run by the language interpreter
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\frame[containsverbatim, plain, shrink=10]{
+ \begin{verbatim}
+def interpret(bytecode, a):
+ regs = [0] * 256
+ pc = 0
+ while True:
+ opcode = ord(bytecode[pc])
+ pc += 1
+ if opcode == JUMP_IF_A:
+ target = ord(bytecode[pc])
+ pc += 1
+ if a:
+ pc = target
+ elif opcode == MOV_A_R:
+ n = ord(bytecode[pc])
+ pc += 1
+ regs[n] = a
+ elif opcode == MOV_R_A:
+ n = ord(bytecode[pc])
+ pc += 1
+ a = regs[n]
+ elif opcode == ADD_R_TO_A:
+ n = ord(bytecode[pc])
+ pc += 1
+ a += regs[n]
+ elif opcode == DECR_A:
+ a -= 1
+ elif opcode == RETURN_A:
+ return a
+ \end{verbatim}
+}
+
+\frame[containsverbatim, plain, shrink=10]{
+ \begin{verbatim}
+def interpret(bytecode, a): |
+ regs = [0] * 256 | # Example bytecode
+ pc = 0 | # Square the accumulator:
+ while True: |
+ opcode = ord(bytecode[pc]) | MOV_A_R 0 # i = a
+ pc += 1 | MOV_A_R 1 # copy of 'a'
+ if opcode == JUMP_IF_A: |
+ target = ord(bytecode[pc]) | # 4:
+ pc += 1 | MOV_R_A 0 # i--
+ if a: | DECR_A
+ pc = target | MOV_A_R 0
+ elif opcode == MOV_A_R: |
+ n = ord(bytecode[pc]) | MOV_R_A 2 # res += a
+ pc += 1 | ADD_R_TO_A 1
+ regs[n] = a | MOV_A_R 2
+ elif opcode == MOV_R_A: |
+ n = ord(bytecode[pc]) | MOV_R_A 0 # if i!=0:
+ pc += 1 | JUMP_IF_A 4 # goto 4
+ a = regs[n] |
+ elif opcode == ADD_R_TO_A: | MOV_R_A 2 # return res
+ n = ord(bytecode[pc]) | RETURN_A
+ pc += 1 |
+ a += regs[n] |
+ elif opcode == DECR_A: |
+ a -= 1 |
+ elif opcode == RETURN_A: |
+ return a |
+ \end{verbatim}
+}
+
+\frame[containsverbatim, plain, shrink=10]{
+ \frametitle{Trace}
+ ~\\
+ Resulting trace when tracing bytecode \texttt{DECR\_A}:
+ \begin{verbatim}
+
+loop_start(a0, regs0, bytecode0, pc0)
+opcode0 = strgetitem(bytecode0, pc0)
+pc1 = int_add(pc0, Const(1))
+guard_value(opcode0, Const(7))
+a1 = int_sub(a0, Const(1))
+jump(a1, regs0, bytecode0, pc1)
+\end{verbatim}
+}
+
+\begin{frame}
+ \frametitle{Idea for a Solution}
+ \begin{itemize}
+ \item key idea: try to trace the loops in the user program
+ \item approach: add things to the position key of the tracer
+ \item tracing interpreter needs information about the language interpreter
+ \item provided by adding \emph{hints} to the language interpreter
+ \end{itemize}
+ \pause
+ \begin{block}{Hints Give Information About:}
+ \begin{itemize}
+ \item which variables make up the program counter of the language interpreter
+ \item where the bytecode dispatch loop is
+ \item which bytecodes can correspond to backward jumps
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\frame[containsverbatim, plain, shrink=10]{
+ \frametitle{Interpreter with Hints}
+\begin{verbatim}
+tlrjitdriver = JitDriver(['pc', 'bytecode'])
+
+def interpret(bytecode, a):
+ regs = [0] * 256
+ pc = 0
+ while True:
+ tlrjitdriver.start_dispatch_loop()
+ opcode = ord(bytecode[pc])
+ pc += 1
+ if opcode == JUMP_IF_A:
+ target = ord(bytecode[pc])
+ pc += 1
+ if a:
+ pc = target
+ if target < pc:
+ tlrjitdriver.backward_jump()
+ elif opcode == MOV_A_R:
+ ... # rest unmodified
+\end{verbatim}
+}
+
+\frame[containsverbatim, plain, shrink=20]{
+ \frametitle{Result When Tracing \texttt{SQUARE}}
+\begin{verbatim}
+loop_start(a0, regs0, bytecode0, pc0)
+# MOV_R_A 0
+opcode0 = strgetitem(bytecode0, pc0)
+pc1 = int_add(pc0, Const(1))
+guard_value(opcode0, Const(2))
+n1 = strgetitem(bytecode0, pc1)
+pc2 = int_add(pc1, Const(1))
+a1 = call(Const(<* fn list_getitem>), regs0, n1)
+# DECR_A
+...
+# MOV_A_R 0
+...
+# MOV_R_A 2
+...
+# ADD_R_TO_A 1
+...
+# MOV_A_R 2
+...
+# MOV_R_A 0
+...
+# JUMP_IF_A 4
+opcode6 = strgetitem(bytecode0, pc13)
+pc14 = int_add(pc13, Const(1))
+guard_value(opcode6, Const(3))
+target0 = strgetitem(bytecode0, pc14)
+pc15 = int_add(pc14, Const(1))
+i1 = int_is_true(a5)
+guard_true(i1)
+jump(a5, regs0, bytecode0, target0)
+\end{verbatim}
+}
+
+\begin{frame}
+ \frametitle{What Have We Won?}
+ \begin{itemize}
+ \item trace corresponds to one loop of the user program
+ \item however, most operations concerned with manipulating bytecode and program counter
+ \item bytecode and program counter are part of the position key
+ \item thus they are constant at the beginning of the loop
+ \item therefore they can and should be constant-folded
+ \end{itemize}
+\end{frame}
+
+\frame[containsverbatim, plain, shrink=20]{
+ \frametitle{Result When Tracing \texttt{SQUARE} With Constant-Folding}
+\begin{verbatim}
+loop_start(a0, regs0)
+# MOV_R_A 0
+a1 = call(Const(<* fn list_getitem>), regs0, Const(0))
+# DECR_A
+a2 = int_sub(a1, Const(1))
+# MOV_A_R 0
+call(Const(<* fn list_setitem>), regs0, Const(0), a2)
+# MOV_R_A 2
+a3 = call(Const(<* fn list_getitem>), regs0, Const(2))
+# ADD_R_TO_A 1
+i0 = call(Const(<* fn list_getitem>), regs0, Const(1))
+a4 = int_add(a3, i0)
+# MOV_A_R 2
+call(Const(<* fn list_setitem>), regs0, Const(2), a4)
+# MOV_R_A 0
+a5 = call(Const(<* fn list_getitem>), regs0, Const(0))
+# JUMP_IF_A 4
+i1 = int_is_true(a5)
+guard_true(i1)
+jump(a5, regs0)
+\end{verbatim}
+}
+
+\begin{frame}
+ \frametitle{Results}
+ \begin{itemize}
+ \item almost only computations related to the user program remain
+ \item list of registers is only vestige of language interpreter
+ \end{itemize}
+ \pause
+ \begin{block}{Timing Results Computing Square of 10'000'000}
+ \begin{tabular}{|l|l|r|r|}
+\hline
+& &Time (ms) &speedup\\
+\hline
+1 &No JIT &442.7 $\pm$ 3.4 &1.00\\
+2 &Normal Trace Compilation &1518.7 $\pm$ 7.2 &0.29\\
+3 &Unrolling of Interp. Loop &737.6 $\pm$ 7.9 &0.60\\
+4 &JIT, Full Optimizations &156.2 $\pm$ 3.8 &2.83\\
+\hline
+\end{tabular}
+\end{block}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Scaling to Large Interpreters?}
+ \begin{itemize}
+ \item we can apply this approach to PyPy's Python interpreter (70 KLOC)
+ \item speed-ups promising: factor of 6 faster for simple loops with arithmetic
+ \item no Python-specific bugs!
+ \end{itemize}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{Conclusions}
+ \begin{itemize}
+ \item some small changes to a tracing JIT makes it possible to effectively apply it to bytecode interpreters
+ \item result is similar to a tracing JIT for that language
+ \item bears resemblance to partial evaluation, arrived at by different means
+ \item maybe enough to write exactly one tracing JIT?
+ \end{itemize}
+ \pause
+ \begin{block}{Outlook}
+ \begin{itemize}
+ \item better optimizations of the traces
+ \item escape analysis
+ \item optimize frame objects
+ \item speed up tracing itself
+ \item apply to other interpreters and larger programs
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Thank you! Questions?}
+ \begin{itemize}
+ \item some small changes to a tracing JIT makes it possible to effectively apply it to bytecode interpreters
+ \item result is similar to a tracing JIT for that language
+ \item bears resemblance to partial evaluation, arrived at by different means
+ \item maybe enough to write exactly one tracing JIT?
+ \end{itemize}
+ \begin{block}{Outlook}
+ \begin{itemize}
+ \item better optimizations of the traces
+ \item escape analysis
+ \item optimize frame objects
+ \item speed up tracing itself
+ \item apply to other interpreters and larger programs
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Backup Slides}
+\end{frame}
+
+\frame[containsverbatim, plain, shrink=10]{
+ \frametitle{Timings for Python Interpreter}
+\begin{verbatim}
+def f(a):
+ t = (1, 2, 3)
+ i = 0
+ while i < a:
+ t = (t[1], t[2], t[0])
+ i += t[0]
+ return i
+\end{verbatim}
+\begin{block}{Timings}
+\begin{tabular}{|l|l|r|r|}
+\hline
+& &Time (s) &speedup\\
+\hline
+1 &PyPy compiled to C, no JIT &23.44 $\pm$ 0.07 &1.00\\
+2 &PyPy comp'd to C, with JIT &3.58 $\pm$ 0.05 &6.54\\
+3 &CPython 2.5.2 &4.96 $\pm$ 0.05 &4.73\\
+4 &CPython 2.5.2 + Psyco 1.6 &1.51 $\pm$ 0.05 &15.57\\\hline
+\end{tabular}
+\end{block}
+}
+
+\end{document}
More information about the Pypy-commit
mailing list