diff --git a/slides/2025-10-10-how-rust-compiles/index.html b/slides/2025-10-10-how-rust-compiles/index.html index 0a5f0c0..5725874 100644 --- a/slides/2025-10-10-how-rust-compiles/index.html +++ b/slides/2025-10-10-how-rust-compiles/index.html @@ -85,22 +85,12 @@

it all starts at the source

-

+          

               pub fn add(a: u8, b: u8) -> u8 {
                 a.wrapping_add(b)
               }
             
-
-

it gets processed

-

-            #[attr = MacroUse {arguments: UseAll}]
-            extern crate std;
-            #[prelude_import]
-            use std::prelude::rust_2024::*;
-            fn add(a: u8, b: u8) -> u8 { a.wrapping_add(b) }
-            
-

until it doesn't even look like Rust anymore

MIR

@@ -176,13 +166,12 @@ crate --> cgu1["Codegen-Unit 1"] crate --> cgu2["Codegen-Unit 2"] crate --> cgu3["Codegen-Unit 3"] - crate --> cgu4["Codegen-Unit 4"]

codegen units

-

+          

 fn main() {}
           
@@ -199,17 +188,31 @@ fn main() {} mainmir --> mainll - mycgu1.rcgu.o --> my_binary - std["std (and others)"] --> my_binary + mycgu1.rcgu.o --> |link| my_binary + std["std (and others)"] --> |link| my_binary
+
+ +

codegen units (but more)

-

+          

 fn main() {}
-fn foo1() {}
-fn foo2() {}
+mod foos {
+  fn foo1() {}
+  fn foo2() {}
+}
           
@@ -243,7 +246,7 @@ fn foo2() {}
         

codegen units (cross-crate)

-

+            

 fn add() {}
           

@@ -394,6 +397,7 @@ fn main() { math::add() }
             - spend N times optimizing the function
             - and there's duplicate instances!
             - `cargo-llvm-lines`
+            - share-generics helps for non-release builds
           
         
@@ -552,67 +556,6 @@ fn main() { math::add() } - comes in many forms
-

lto = "fat" (monolithic)

@@ -645,7 +588,7 @@ Range (min … max): 271.007 s … 275.619 s 5 runs end
-

compiles r-a 237583957% more slowly

+

easily compiles 2-4x more slowly

lto = "thin" (sharded)

@@ -665,14 +608,16 @@ Range (min … max): 271.007 s … 275.619 s 5 runs end subgraph my crate - subgraph thinltosummary[ThinLTO Summary] + subgraph thinltosummary[ThinLTO Index] end subgraph thinlto1[ThinLTO 1] addll["add (LLVM IR)"] - subll["sub (LLVM IR)"] end subgraph thinlto2[ThinLTO 2] + subll["sub (LLVM IR)"] + end + subgraph thinlto3[ThinLTO 3] mainll["main (LLVM IR)"] end @@ -686,10 +631,14 @@ Range (min … max): 271.007 s … 275.619 s 5 runs thinlto1 --> my_binary thinlto2 --> my_binary + thinlto3 --> my_binary end -

compiles r-a 70% more slowly

+

+ compiles ~1.1x-1.2x more slowly | + ThinLTO Talk +