From a5058bef51f172c9b4d9d1e7a3653ae2280e72d8 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sun, 7 May 2023 12:13:13 +0200 Subject: [PATCH] qpath parsing --- Cargo.lock | 15 +++-- elven-forest/Cargo.toml | 1 + elven-forest/src/main.rs | 3 +- elven-forest/src/size.rs | 117 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 129 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5174ba8..2503a0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,6 +109,7 @@ dependencies = [ "clap", "elven-parser", "memmap2", + "rustc-demangle", "tabled", ] @@ -319,18 +320,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.23" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -359,6 +360,12 @@ version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + [[package]] name = "rustix" version = "0.36.8" diff --git a/elven-forest/Cargo.toml b/elven-forest/Cargo.toml index 8ead826..2670b38 100644 --- a/elven-forest/Cargo.toml +++ b/elven-forest/Cargo.toml @@ -10,4 +10,5 @@ anyhow = "1.0.69" clap = { version = "4.1.6", features = ["derive"] } elven-parser = { path = "../elven-parser" } memmap2 = "0.5.8" +rustc-demangle = { version = "0.1.23", features = ["std"] } tabled = "0.10.0" diff --git a/elven-forest/src/main.rs b/elven-forest/src/main.rs index 64604ab..39aa4f0 100644 --- a/elven-forest/src/main.rs +++ b/elven-forest/src/main.rs @@ -1,10 +1,9 @@ -mod size; - use std::{ fmt::Display, fs::File, path::{Path, PathBuf}, }; +mod size; use anyhow::Context; use clap::Parser; diff --git a/elven-forest/src/size.rs b/elven-forest/src/size.rs index 0943144..4c7fa7d 100644 --- a/elven-forest/src/size.rs +++ b/elven-forest/src/size.rs @@ -35,8 +35,123 @@ pub fn analyze_text_bloat(elf: ElfReader<'_>) -> Result<()> { symbol_sizes.reverse(); for (sym, size) in symbol_sizes { - println!("{size} {sym}"); + let components = + symbol_components(std::str::from_utf8(sym)?).with_context(|| sym.to_string())?; + + println!("{size} {components}"); } Ok(()) } + +fn symbol_components(sym: &str) -> Result { + let demangled = rustc_demangle::demangle(sym).to_string(); + + if demangled.starts_with('<') { + let qpath = parse_qpath(&demangled).context("invalid qpath")?; + + // qpath + return Ok(demangled); + } else { + // normal path + let components = demangled.split("::").collect::>(); + let path = components.join(";"); + return Ok(path); + } +} + +#[derive(Debug, PartialEq)] +struct QPath<'a> { + qself: &'a str, + trait_: &'a str, + pathy_bit: &'a str, +} + +fn parse_qpath(s: &str) -> Result> { + let mut chars = s.char_indices().skip(1); + let mut angle_brackets = 1u64; + + let mut result = None; + let mut as_idx = None; + + while let Some((idx, char)) = chars.next() { + match char { + '<' => angle_brackets += 1, + '>' => { + angle_brackets -= 1; + if angle_brackets == 0 { + result = Some(idx); + break; + } + } + ' ' => { + if angle_brackets == 1 && as_idx == None { + as_idx = Some(idx); + } + } + _ => {} + } + } + + let q_close_idx = result.with_context(|| { + format!("qualified symbol `{s}` does not end qualified part with > properly") + })?; + + let as_idx = + as_idx.with_context(|| format!("qualified symbol `{s}` does not contain ` as `"))?; + + let q = &s[..q_close_idx]; + let pathy_bit = &s[q_close_idx + 1..]; + let pathy_bit = pathy_bit.strip_prefix("::").with_context(|| { + format!("path after qualification does not start with `::`: `{pathy_bit}`") + })?; + + let qself = &q[1..as_idx]; + let trait_ = &q[(as_idx + " as ".len())..]; + + Ok(QPath { + qself, + trait_, + pathy_bit, + }) +} + +#[cfg(test)] +mod tests { + use crate::size::QPath; + + use super::{parse_qpath, symbol_components}; + + #[test] + fn parse_qpaths() { + assert_eq!( + parse_qpath("::fmt").unwrap(), + QPath { + qself: "std::path::Components", + trait_: "core::fmt::Debug", + pathy_bit: "fmt", + } + ); + + assert_eq!( + parse_qpath("<::fmt::DebugHelper as core::fmt::Debug>::fmt").unwrap(), + QPath { + qself: "::fmt::DebugHelper", + trait_: "core::fmt::Debug", + pathy_bit: "fmt", + } + ); + } + + #[test] + fn path_debug_helper() { + // <::fmt::DebugHelper as core::fmt::Debug>::fmt::hc586615181f69e94 + let sym = "_ZN106_$LT$$LT$std..path..Iter$u20$as$u20$core..fmt..Debug$GT$..fmt..DebugHelper$u20$as$u20$core..fmt..Debug$GT$3fmt17h4f87ac80fb33df05E"; + let components = symbol_components(sym).unwrap(); + + assert_eq!( + components, + "std;path;Iter;fmt;DebugHelper;fmt;hc586615181f69e94" + ) + } +}