diff --git a/rust2/benches/opts.rs b/rust2/benches/opts.rs
index b7cebd0..97e3fa2 100644
--- a/rust2/benches/opts.rs
+++ b/rust2/benches/opts.rs
@@ -23,9 +23,10 @@ impl Write for MockReadWrite {
 
 fn run_bf(bf: &str) {
     let bump = Bump::new();
-    let parsed = brainfuck::parse::parse(&bump, bf.bytes().enumerate()).unwrap();
-    let optimized = brainfuck::opts::optimize(&bump, &parsed);
-    brainfuck::ir_interpreter::run(&optimized, MockReadWrite, MockReadWrite);
+    let ast = brainfuck::parse::parse(&bump, bf.bytes().enumerate()).unwrap();
+    let ir = brainfuck::opts::optimize(&bump, &ast);
+    let code = brainfuck::codegen::generate(&bump, &ir);
+    brainfuck::codegen_interpreter::run(&code, MockReadWrite, MockReadWrite);
 }
 
 fn optimized(c: &mut Criterion) {
diff --git a/rust2/src/codegen.rs b/rust2/src/codegen.rs
index 7f6518b..da3c71d 100644
--- a/rust2/src/codegen.rs
+++ b/rust2/src/codegen.rs
@@ -5,11 +5,14 @@
 //! ```
 //! compiles down to
 //! ```text
-//! Add | Add | JmpIfZero | Out | End | Sub | JmpIfNonZero | Jmp
-//!                  |       |           ^         |          |
-//!                  +-------------------+---------|----------+
-//!                          +---------------------+
+//! Add | Add | JmpIfZero | Sub | JumpIfNotZero | Out | End
+//!                  |       ^           |         ^
+//!                  +-------|-----------|---------|
+//!                          +-----------+
 //! ```
+//!
+//! technically, the `JumpIfNotZero` would be an unconditional Jmp to the `JmpIfZero`, but that's
+//! a needless indirection.
 
 use crate::opts::{Ir, Stmt as IrStmt, StmtKind};
 use crate::parse::Span;
@@ -26,7 +29,6 @@ pub enum Stmt {
     SetNull,
     JmpIfZero(usize),
     JmpIfNonZero(usize),
-    Jmp(usize),
     End,
 }
 
@@ -36,36 +38,28 @@ pub struct Code<'c> {
     pub debug: Vec<Span, &'c Bump>,
 }
 
-struct UnlinkedCode<'u> {
-    pub stmts: Vec<Vec<Stmt, &'u Bump>, &'u Bump>,
-    pub debug: Vec<Vec<Span, &'u Bump>, &'u Bump>,
-}
-
 pub fn generate<'c>(alloc: &'c Bump, ir: &Ir<'_>) -> Code<'c> {
-    let unlinked_alloc = Bump::new();
+    let stmts = Vec::new_in(alloc);
+    let debug = Vec::new_in(alloc);
+    let mut code = Code { stmts, debug };
 
-    let stmts = Vec::new_in(&unlinked_alloc);
-    let debug = Vec::new_in(&unlinked_alloc);
-    let mut unlinked = UnlinkedCode { stmts, debug };
+    generate_stmts(&mut code, &ir.stmts);
+    code.stmts.push(Stmt::End);
+    code.debug.push(Span::default());
 
-    generate_stmts(&unlinked_alloc, &mut unlinked, &ir.stmts);
-
-    link(alloc, &unlinked)
-}
-
-fn generate_stmts<'u>(alloc: &'u Bump, code: &mut UnlinkedCode<'u>, ir: &[IrStmt<'_>]) {
-    for ir_stmt in ir {
-        ir_to_stmt(alloc, code, ir_stmt, 0);
-    }
     assert_eq!(code.stmts.len(), code.debug.len());
+
+    code
 }
 
-fn ir_to_stmt<'u>(
-    alloc: &'u Bump,
-    code: &mut UnlinkedCode<'u>,
-    ir_stmt: &IrStmt<'_>,
-    current_block: usize,
-) {
+fn generate_stmts<'c>(code: &mut Code<'c>, ir: &[IrStmt<'_>]) {
+    for ir_stmt in ir {
+        ir_to_stmt(code, ir_stmt);
+    }
+    debug_assert_eq!(code.stmts.len(), code.debug.len());
+}
+
+fn ir_to_stmt<'c>(code: &mut Code<'c>, ir_stmt: &IrStmt<'_>) {
     let stmt = match &ir_stmt.kind {
         StmtKind::Add(n) => Stmt::Add(*n),
         StmtKind::Sub(n) => Stmt::Sub(*n),
@@ -75,20 +69,27 @@ fn ir_to_stmt<'u>(
         StmtKind::In => Stmt::In,
         StmtKind::SetNull => Stmt::SetNull,
         StmtKind::Loop(instr) => {
-            let new_block = Vec::new_in(alloc);
-            let new_block_debug = Vec::new_in(alloc);
-            code.stmts.push(new_block);
-            code.stmts.push(new_block_debug);
+            let skip_jmp_idx = code.stmts.len();
+            code.stmts.push(Stmt::JmpIfZero(usize::MAX)); // placeholder
+            code.debug.push(ir_stmt.span);
+
+            // compile the loop body now
+            generate_stmts(code, &instr.stmts);
+            // if the loop body is empty, we jmp to ourselves, which is an infinite loop - as expected
+            let first_loop_body_idx = skip_jmp_idx + 1;
+            code.stmts.push(Stmt::JmpIfNonZero(first_loop_body_idx));
+            code.debug.push(ir_stmt.span);
+
+            // there will always at least be an `End` instruction after the loop
+            let after_loop_idx = code.stmts.len();
+
+            // fix the placeholder with the actual index
+            code.stmts[skip_jmp_idx] = Stmt::JmpIfZero(after_loop_idx);
 
-            let current_block = code.stmts.len() - 1;
             return;
         }
     };
 
-    code.stmts[current_block].push(stmt);
-    code.debug[current_block].push(ir_stmt.span);
-}
-
-fn link<'c>(alloc: &'c Bump, code: &UnlinkedCode<'_>) -> Code<'c> {
-    todo!()
+    code.stmts.push(stmt);
+    code.debug.push(ir_stmt.span);
 }
diff --git a/rust2/src/codegen_interpreter.rs b/rust2/src/codegen_interpreter.rs
index 3d43d4d..67c4eb9 100644
--- a/rust2/src/codegen_interpreter.rs
+++ b/rust2/src/codegen_interpreter.rs
@@ -82,9 +82,6 @@ impl<'c, W: Write, R: Read> Interpreter<'c, W, R> {
                         self.ip = pos;
                     }
                 }
-                Stmt::Jmp(pos) => {
-                    self.ip = pos;
-                }
                 Stmt::End => break,
             }
         }
diff --git a/rust2/src/lib.rs b/rust2/src/lib.rs
index 457b4fc..dd2f7f7 100644
--- a/rust2/src/lib.rs
+++ b/rust2/src/lib.rs
@@ -7,8 +7,8 @@ use bumpalo::Bump;
 use std::fmt::Display;
 use std::io::{Read, Write};
 
-mod codegen;
-mod codegen_interpreter;
+pub mod codegen;
+pub mod codegen_interpreter;
 pub mod opts;
 pub mod parse;
 
diff --git a/rust2/src/main.rs b/rust2/src/main.rs
index 0aafec6..4a25adc 100644
--- a/rust2/src/main.rs
+++ b/rust2/src/main.rs
@@ -20,7 +20,7 @@ fn main() {
     let stdin = io::stdin();
     let stdin = stdin.lock();
 
-    brainfuck::run(&file, stdout, stdin, UseProfile::Yes).unwrap_or_else(|_| {
+    brainfuck::run(&file, stdout, stdin, UseProfile::No).unwrap_or_else(|_| {
         eprintln!("error: Failed to parse brainfuck code");
         process::exit(1);
     });
diff --git a/rust2/src/parse.rs b/rust2/src/parse.rs
index c5545da..60086b4 100644
--- a/rust2/src/parse.rs
+++ b/rust2/src/parse.rs
@@ -37,7 +37,7 @@ impl Span {
     }
 }
 
-pub type Instrs<'ast> = Vec<(Instr<'ast>, Span), &'ast Bump>;
+pub type Ast<'ast> = Vec<(Instr<'ast>, Span), &'ast Bump>;
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum Instr<'ast> {
@@ -47,13 +47,13 @@ pub enum Instr<'ast> {
     Left,
     Out,
     In,
-    Loop(Instrs<'ast>),
+    Loop(Ast<'ast>),
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ParseError;
 
-pub fn parse<I>(alloc: &Bump, mut src: I) -> Result<Instrs<'_>, ParseError>
+pub fn parse<I>(alloc: &Bump, mut src: I) -> Result<Ast<'_>, ParseError>
 where
     I: Iterator<Item = (usize, u8)>,
 {
@@ -85,7 +85,7 @@ fn parse_loop<'ast, I>(
     src: &mut I,
     depth: u16,
     start_idx: usize,
-) -> Result<(Instrs<'ast>, Span), ParseError>
+) -> Result<(Ast<'ast>, Span), ParseError>
 where
     I: Iterator<Item = (usize, u8)>,
 {