Add slides for final presentation.

2016-04-03 23:20:44 -06:00 · 2016-04-03 23:20:44 -06:00 · 9e3e2decba
commit 9e3e2decba
parent e72d8f8dc6
4 changed files with 457 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,5 @@
 /target
 /doc
+/final-presentation/out
 *.dot
 *.mir
-*.png
--- a/final-presentation/latexmkrc
+++ b/final-presentation/latexmkrc
@ -0,0 +1,12 @@
+# vim: ft=perl
+
+$pdf_mode = 1;
+$pdflatex = 'lualatex --shell-escape %O %S';
+$out_dir = 'out';
+
+# This improves latexmk's detection of source files and generated files.
+$recorder = 1;
+
+# Ignore always-regenerated *.pyg files from the minted package when considering
+# whether to run pdflatex again.
+$hash_calc_ignore_pattern{'pyg'} = '.*';
--- a/final-presentation/rust-logo-512x512.png
+++ b/final-presentation/rust-logo-512x512.png
--- a/final-presentation/slides.tex
+++ b/final-presentation/slides.tex
@ -0,0 +1,444 @@
+\documentclass{beamer}
+\usecolortheme{beaver}
+\beamertemplatenavigationsymbolsempty
+
+% Fonts
+\usepackage{fontspec}
+\setmainfont{Source Serif Pro}[Ligatures=TeX]
+\setsansfont{Source Sans Pro}[Ligatures=TeX]
+\setmonofont{Source Code Pro}[
+  BoldFont={* Medium},
+  BoldItalicFont={* Medium Italic},
+]
+
+\usepackage[outputdir=out]{minted}
+\usepackage{tikz}
+\usetikzlibrary{positioning, fit}
+
+\tikzset{
+  invisible/.style={opacity=0,text opacity=0},
+  highlight/.style={color=red},
+  intro/.code args={<#1>}{%
+    \only<#1>{\pgfkeysalso{highlight}}
+    \alt<#1->{}{\pgfkeysalso{invisible}}
+  },
+}
+
+\title{Miri}
+\subtitle{An interpreter for Rust's mid-level intermediate representation}
+\author{
+  Scott Olson
+  \texorpdfstring{\\ \scriptsize{Supervisor: Christopher Dutchyn}}{}
+}
+\institute{
+  CMPT 400 \\
+  University of Saskatchewan
+}
+\date{}
+\titlegraphic{
+  \includegraphics[width=64px,height=64px]{rust-logo-512x512.png} \\
+  \scriptsize{\url{https://www.rust-lang.org}}
+}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Intro slides
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{document}
+
+\maketitle
+
+\begin{frame}[fragile]
+  \frametitle{What is Rust? \small{[review]}}
+
+  According to the website\dots
+
+  \begin{quote}
+    \textbf{Rust} is a systems programming language that runs blazingly fast,
+    prevents nearly all segfaults, and guarantees thread safety.
+  \end{quote}
+
+  It's a new programming language from Mozilla, and it looks like this:
+
+  \begin{minted}[
+    autogobble,
+    fontsize=\footnotesize,
+    mathescape,
+    xleftmargin=.3in,
+  ]{rust}
+    fn factorial(n: u64) -> u64 {
+        (1..n).fold(1, |a, b| a * b)
+    }
+
+    fn main() {
+        for x in 1..6 {
+            println!("{}", factorial(x));
+        }
+        // $\Rightarrow$ 1
+        // $\Rightarrow$ 1
+        // $\Rightarrow$ 2
+        // $\Rightarrow$ 6
+        // $\Rightarrow$ 24
+    }
+  \end{minted}
+\end{frame}
+
+\begin{frame}
+  \frametitle{How does Rust compile code? \onslide<-6>{\small{[review]}}}
+
+  \begin{center}
+    \begin{tikzpicture}[x=4cm, y=3.5cm, auto, rounded corners]
+      \tikzstyle{basic-stage}=[rectangle, draw, thick, align=center]
+      \tikzstyle{stage}=[basic-stage, font=\tiny]
+      \tikzstyle{pass}=[thick, -stealth]
+      \tikzstyle{pass-label}=[font=\footnotesize]
+
+      \node[basic-stage] (src) at (0,0) {Source\\Code};
+      \node[basic-stage] (mach) at (2,-1) {Machine\\Code};
+
+      \draw<1>[pass, out=0, in=180]
+        (src.east) to node[font=\Huge] {?} (mach.west);
+
+      \node[stage, intro=<2>] (ast) at (1,0)
+        {\normalsize{AST} \\ Abstract Syntax Tree};
+      \draw[pass, intro=<2>]
+        (src) -- node[pass-label] {Parse} (ast);
+
+      \node[stage, intro=<3>] (hir) at (2,0)
+        {\normalsize{HIR} \\ High-level Intermediate\\Representation};
+      \draw[pass, intro=<3>]
+        (ast) -- node[pass-label] {Simplify} (hir);
+
+      \node[stage, intro=<4>] (mir) at (0,-1)
+        {\normalsize{MIR} \\ Mid-level Intermediate\\Representation};
+      \path (hir.south) -- coordinate (middle) (mir.north);
+      \draw[pass, intro=<4>]
+        (hir.south) |- (middle) -| (mir.north);
+      \node[pass-label, above, intro=<4>] at (middle) {Lower};
+
+      \node[stage, intro=<5>] (llvm) at (1,-1)
+        {\normalsize{LLVM IR} \\ Low-level Intermediate\\Representation};
+      \draw[pass, intro=<5>]
+        (mir) -- node[pass-label] {Translate} (llvm);
+
+      \draw<6->[pass, intro=<6>]
+        (llvm) -- node[pass-label] {Magic} (mach);
+
+      \node[stage, intro=<7>] (exec) at (1,-1.75)
+        {\normalsize{Execution}};
+      \draw[pass, intro=<7>]
+        (mach) -- node[pass-label] {CPU} (exec);
+
+      \draw[pass, intro=<8>]
+        (mir) -- node[pass-label] {Miri} (exec);
+    \end{tikzpicture}
+  \end{center}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Why build Miri?}
+  \begin{itemize}
+    \item For fun and learning.
+
+    \item I originally planned to use it for testing the compiler and execution
+      of unsafe code, but shifted my goals along the way. \pause
+
+    \item Now it serves as an experimental implementation of the upcoming
+      compile-time function evaluation feature in Rust. \pause
+
+      \begin{itemize}
+        \item Similar to C++14's \mintinline{cpp}{constexpr} feature.
+
+        \item You can do complicated calculations at compile time and compile
+          their \emph{results} into the executable. \pause
+
+        \item For example, you can compute a ``perfect hash function'' for a
+          statically-known map at compile-time and have guaranteed no-collision
+          lookup at runtime. \pause
+
+        \item Miri actually supports far more of Rust than C++14's
+          \mintinline{cpp}{constexpr} does of C++ --- even heap allocation and
+          unsafe code.
+      \end{itemize}
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{How was it built?}
+
+  At first I wrote a naive version with a number of downsides:
+
+  \begin{itemize}
+    \item represented values in a traditional dynamic language format, where
+      every value was the same size.
+
+    \item didn't work well for aggregates (structs, enums, arrays, etc.).
+
+    \item made unsafe programming tricks that make assumptions about low-level
+      value layout essentially impossible.
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{How was it built?}
+  \begin{itemize}
+    \item Later, a Rust compiler team member proposed a ``Rust abstract
+      machine`` with specialized value layout which solved my previous problems.
+      \pause
+
+    \item His proposal was intended for a compile-time function evaluator in the
+      Rust compiler, so I effectively implemented an experimental version of
+      that. \pause
+
+    \item After this point, making Miri work well was primarily a software
+      engineering problem.
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Data layout}
+  \begin{itemize}
+    \item Memory in Miri is literally a HashMap from ``allocation IDs'' to
+      ``abstract allocations''.
+
+    \item Allocations are represented by: \pause
+      \begin{enumerate}
+        \item An array of \textbf{raw bytes} with a size based on the type of
+          the value \pause
+        \item A set of \textbf{relocations} --- pointers into other abstract
+          allocations \pause
+        \item A mask determining which bytes are \textbf{undefined}
+      \end{enumerate}
+  \end{itemize}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Miri example slides
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+  \frametitle{\texttt{square} example}
+  \begin{center}
+    \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
+      // Rust
+      fn square(n: u64) -> u64 {
+          n * n
+      }
+
+      // Generated MIR
+      fn square(arg0: u64) -> u64 {
+          let var0: u64; // n           // On function entry, Miri creates
+                                        // virtual allocations for all the
+                                        // arguments, variables, and
+                                        // temporaries.
+
+          bb0: {
+              var0 = arg0;              // Copy the argument into `n`.
+              return = Mul(var0, var0); // Multiply `n` with itself.
+              goto -> bb1;              // Jump to basic block `bb1`.
+          }
+
+          bb1: {
+              return;                   // Return from the current fn.
+          }
+      }
+    \end{minted}
+  \end{center}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{\texttt{sum} example}
+  \begin{center}
+    \begin{minted}[autogobble,fontsize=\tiny]{rust}
+      // Rust
+      fn sum() -> u64 {
+          let mut sum = 0; let mut i = 0;
+          while i < 10 { sum += i; i += 1; }
+          sum
+      }
+
+      // Generated MIR
+      fn sum() -> u64 {
+          let mut var0: u64; // sum
+          let mut var1: u64; // i
+          let mut tmp0: bool;
+
+          bb0: {
+              // sum = 0; i = 0;
+              var0 = const 0u64; var1 = const 0u64; goto -> bb1;
+          }
+          bb1: {
+              // if i < 10 { goto bb2; } else { goto bb3; }
+              tmp0 = Lt(var1, const 10u64);
+              if(tmp0) -> [true: bb2, false: bb3];
+          }
+          bb2: {
+              var0 = Add(var0, var1);       // sum = sum + i;
+              var1 = Add(var1, const 1u64); // i = i + 1;
+              goto -> bb1;
+          }
+          bb3: {
+              return = var0; goto -> bb4;
+          }
+          bb4: { return; }
+      }
+    \end{minted}
+  \end{center}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Heap allocations!}
+  \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
+    fn make_vec() -> Vec<u8> {
+        // Empty array with space for 4 bytes - allocated on the heap!
+        let mut vec = Vec::with_capacity(4);
+        // Initialize the first two slots.
+        vec.push(1);
+        vec.push(2);
+        vec
+    }
+
+    // For reference:
+    //   struct Vec<T> { capacity: usize, data: *mut T, length: usize }
+
+    // Resulting allocations (on 32-bit little-endian architectures):
+    //   Region A:
+    //     04 00 00 00  00 00 00 00  02 00 00 00
+    //                  └───(B)───┘
+    //
+    //   Region B:
+    //     01 02 __ __ (underscores denote undefined bytes)
+  \end{minted}
+
+  \footnotesize{Evaluating the above involves a number of compiler built-ins,
+  ``unsafe'' code blocks, and more inside the standard library,
+  but Miri handles it all.}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Unsafe code!}
+  \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
+    fn out_of_bounds() -> u8 {
+        let mut vec = vec![1, 2]
+        unsafe { *vec.get_unchecked(5) }
+    }
+
+    // test.rs:3: error: pointer offset outside bounds of allocation
+    // test.rs:3:     unsafe { *vec.get_unchecked(5) }
+    //                         ^~~~~~~~~~~~~~~~~~~~~
+
+    fn undefined_bytes() -> u8 {
+        let mut vec = Vec::with_capacity(10);
+        unsafe { *vec.get_unchecked(5) }
+    }
+
+    // test.rs:3: error: attempted to read undefined bytes
+    // test.rs:3:     unsafe { *vec.get_unchecked(5) }
+    //                         ^~~~~~~~~~~~~~~~~~~~~
+  \end{minted}
+\end{frame}
+
+\begin{frame}
+  \frametitle{What can't Miri do?}
+  \begin{itemize}
+    \item Miri can't do all the stuff I didn't implement yet. :)
+      \begin{itemize}
+        \item non-trivial casts
+        \item function pointers
+        \item calling destructors and freeing memory
+        \item handling all constants properly (but, well, Miri might be
+          replacing the old constants system)
+      \end{itemize}
+
+    \item Miri can't do foreign function calls (e.g. calling functions defined
+      in C or C++), but there is a reasonable way it could be done with libffi.
+      \begin{itemize}
+        \item On the other hand, for constant evaluation in the compiler, you
+          want the evaluator to be deterministic and safe, so FFI calls might be
+          banned anyway.
+      \end{itemize}
+
+    \item Without quite some effort, Miri will probably never handle inline
+      assembly...
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \begin{center}
+    \LARGE{Questions?}
+  \end{center}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Extra slides
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}[fragile]
+  \frametitle{\texttt{varN} vs. \texttt{argN}}
+  \begin{center}
+    \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
+      // Rust
+      type Pair = (u64, u64);
+      fn swap((a, b): Pair) -> Pair {
+          (b, a)
+      }
+
+      // Generated MIR
+      fn swap(arg0: (u64, u64)) -> (u64, u64) {
+          let var0: u64; // a
+          let var1: u64; // b
+
+          bb0: {
+              var0 = arg0.0;         // get the 1st part of the pair
+              var1 = arg0.1;         // get the 2nd part of the pair
+              return = (var0, var1); // build a new pair in the result
+              goto -> bb1;
+          }
+
+          bb1: {
+              return;
+          }
+      }
+    \end{minted}
+  \end{center}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{\texttt{factorial} example}
+  \begin{center}
+    \begin{minted}[autogobble,fontsize=\tiny]{rust}
+      // Rust
+      fn factorial(n: u64) -> u64 {
+          (1..n).fold(1, |a, b| a * b)
+      }
+
+      // Generated MIR
+      fn factorial(arg0: u64) -> u64 {
+          let var0: u64; // n
+          let mut tmp0: Range<u64>; // Miri calculates sizes for generics like Range<u64>.
+          let mut tmp1: [closure];
+
+          bb0: {
+              var0 = arg0;
+
+              // tmp0 = 1..n
+              tmp0 = Range<u64> { start: const 1u64, end: var0 };
+
+              // tmp1 = |a, b| a * b
+              tmp1 = [closure];
+
+              // This loads the MIR for the `fold` fn from the standard library.
+              // In general, MIR for any function from any library can be loaded.
+              // return tmp0.fold(1, tmp1)
+              return = Range<u64>::fold(tmp0, const 1u64, tmp1) -> bb1;
+          }
+
+          bb1: {
+              return;
+          }
+      }
+    \end{minted}
+  \end{center}
+\end{frame}
+
+\end{document}