diff --git a/Cargo.toml b/Cargo.toml index 5b32a48..a8b94f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,8 @@ edition = "2021" [dependencies] eyre = "0.6.8" object = "0.31.1" -rustc-demangle = { version = "0.1.23", features = ["std"] } +# I may be depending on things one shall not depend on (the output format). +rustc-demangle = { version = "=0.1.23", features = ["std"] } rustc-hash = "1.1.0" serde = { version = "1.0.164", features = ["derive"] } serde_json = "1.0.99" diff --git a/src/main.rs b/src/main.rs index 7d4c5e0..d45fc17 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,11 @@ -use eyre::{eyre, Context, ContextCompat, Result}; +mod symbols; + +use eyre::{eyre, Context, Result}; use object::{Object, ObjectSection, ObjectSymbol}; -use serde::Serialize; use rustc_hash::FxHashMap; +use serde::Serialize; + +use crate::symbols::symbol_components; #[derive(serde::Serialize)] struct SerGroup { @@ -56,11 +60,15 @@ fn main() -> Result<()> { for (sym, size) in symbol_sizes { let mut components = symbol_components(sym).with_context(|| sym.to_string())?; - if components.len() > limit { components.truncate(limit); } + eprintln!( + "{}", + rustc_demangle::demangle(sym).to_string() + ); + add_to_group(&mut root_groups, components, size); } @@ -134,139 +142,3 @@ fn propagate_weight(group: &mut Group) -> u64 { total_weight } -fn symbol_components(sym: &str) -> Result> { - let demangled = rustc_demangle::demangle(sym).to_string(); - - let components = if demangled.starts_with('<') { - parse_qpath(&demangled) - .context("invalid qpath") - .and_then(|qpath| qpath_components(qpath)) - .unwrap_or_else(|_| demangled.split("::").collect::>()) - } else { - // normal path - demangled.split("::").collect::>() - }; - - let components = components - .into_iter() - .map(|c| { - if c.contains(",") { - format!("\"{c}\"") - } else { - c.to_owned() - } - }) - .collect::>(); - - // qpath - return Ok(components); -} - -#[derive(Debug, Clone, Copy, PartialEq)] -struct QPath<'a> { - qself: &'a str, - trait_: &'a str, - pathy_bit: &'a str, -} - -fn qpath_components(qpath: QPath<'_>) -> Result> { - if qpath.qself.starts_with('<') { - if let Ok(sub_qpath) = parse_qpath(qpath.qself) { - let mut sub_components = qpath_components(sub_qpath)?; - sub_components.extend(qpath.pathy_bit.split("::")); - Ok(sub_components) - } else { - Ok(qpath - .qself - .split("::") - .chain(qpath.pathy_bit.split("::")) - .collect()) - } - } else { - Ok(qpath - .qself - .split("::") - .chain(qpath.pathy_bit.split("::")) - .collect()) - } -} - -// FIXME: Apparently the symbol `std::os::linux::process:: for std::os::fd::owned::OwnedFd>::from` exists in std -// I have no clue what to do about that. - -fn parse_qpath(s: &str) -> Result> { - let mut chars = s.char_indices().skip(1); - let mut angle_brackets = 1u64; - - let mut result = None; - let mut as_idx = None; - - while let Some((idx, char)) = chars.next() { - match char { - '<' => angle_brackets += 1, - '>' => { - angle_brackets -= 1; - if angle_brackets == 0 { - result = Some(idx); - break; - } - } - ' ' => { - if angle_brackets == 1 && as_idx == None { - as_idx = Some(idx); - } - } - _ => {} - } - } - - let q_close_idx = result.wrap_err_with(|| { - format!("qualified symbol `{s}` does not end qualified part with > properly") - })?; - - let as_idx = - as_idx.wrap_err_with(|| format!("qualified symbol `{s}` does not contain ` as `"))?; - - let q = &s[..q_close_idx]; - let pathy_bit = &s[q_close_idx + 1..]; - let pathy_bit = pathy_bit.strip_prefix("::").wrap_err_with(|| { - format!("path after qualification does not start with `::`: `{pathy_bit}`") - })?; - - let qself = &q[1..as_idx]; - let trait_ = &q[(as_idx + " as ".len())..]; - - Ok(QPath { - qself, - trait_, - pathy_bit, - }) -} - -#[cfg(test)] -mod tests { - use super::QPath; - - use super::parse_qpath; - - #[test] - fn parse_qpaths() { - assert_eq!( - parse_qpath("::fmt").unwrap(), - QPath { - qself: "std::path::Components", - trait_: "core::fmt::Debug", - pathy_bit: "fmt", - } - ); - - assert_eq!( - parse_qpath("<::fmt::DebugHelper as core::fmt::Debug>::fmt").unwrap(), - QPath { - qself: "::fmt::DebugHelper", - trait_: "core::fmt::Debug", - pathy_bit: "fmt", - } - ); - } -} diff --git a/src/symbols.rs b/src/symbols.rs new file mode 100644 index 0000000..bfca68d --- /dev/null +++ b/src/symbols.rs @@ -0,0 +1,348 @@ +//! This is really hacky code where we best-effort extract some sort of tree +//! from symbols. It's bad. + +use std::{fmt::Debug, iter::Peekable, str::CharIndices}; + +use eyre::{bail, Context, ContextCompat, Result}; + +pub fn symbol_components(sym: &str) -> Result> { + let demangled = rustc_demangle::demangle(sym).to_string(); + + // If the symbol is a qualified path (`::m`), then we need to parse + // it as such. + let components = if demangled.starts_with('<') { + parse_qpath(&demangled) + .wrap_err("invalid qpath") + .and_then(|qpath| qpath_components(qpath)) + .unwrap_or_else(|_| demangled.split("::").collect::>()) + } else { + // This is not a + demangled.split("::").collect::>() + }; + + let components = components + .into_iter() + .map(ToOwned::to_owned) + .collect::>(); + + // qpath + Ok(components) +} + +#[derive(PartialEq)] +pub struct Path<'a>(Vec>); + +#[derive(PartialEq)] +pub struct PathSegment<'a> { + path: &'a str, + generic_args: Vec>, +} + +impl Debug for Path<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + self.0 + .iter() + .map(|s| format!("{s:?}")) + .collect::>() + .join(",") + ) + } +} + +impl Debug for PathSegment<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.generic_args.is_empty() { + write!(f, "{}", &self.path) + } else { + write!( + f, + "{}[{}]", + &self.path, + self.generic_args + .iter() + .map(|p| format!("{p:?}")) + .collect::>() + .join(", ") + ) + } + } +} + +#[derive(PartialEq)] +enum PathFinished { + Yes, + No, +} + +pub fn parse_path<'a>(path: &'a str, chars: &mut Peekable>) -> Result> { + let mut segments = Vec::new(); + + while let Some((idx, c)) = chars.next() { + match c { + ':' => { + if let Some((_, ':')) = chars.peek() { + chars.next(); + } + } + '<' => { + unreachable!("path cannot start with <") + } + '>' => { + // generic args closing, we're done. + return Ok(Path(segments)); + } + _ => { + let (segment, finished) = parse_path_segment(path, chars, idx)?; + dbg!(&segment); + + segments.push(segment); + + if finished != PathFinished::Yes && !matches!(chars.next(), Some((_, ':'))) { + bail!("Colon must be followed by second colon"); + } + + // we're done. + if finished == PathFinished::Yes { + return Ok(Path(segments)); + } + } + } + } + Ok(Path(segments)) +} + +fn parse_path_segment<'a>( + path: &'a str, + chars: &mut Peekable>, + start_of_path: usize, +) -> Result<(PathSegment<'a>, PathFinished)> { + let mut generic_args = Vec::new(); + + // TODO: Paths can start with < like . In this case, just treat the entire thing as opaque. + + while let Some((idx, c)) = chars.next() { + match c { + ':' | '>' => { + let component = &path[start_of_path..idx]; + return Ok(( + PathSegment { + path: component, + generic_args, + }, + if c == '>' { + PathFinished::Yes + } else { + PathFinished::No + }, + )); + } + '<' => { + let arg = parse_path(path, chars)?; + generic_args.push(arg); + // > has been eaten by parse_path. + let component = &path[start_of_path..idx]; + return Ok(( + PathSegment { + path: component, + generic_args, + }, + PathFinished::No, + )); + } + _ => {} + } + } + + Ok(( + PathSegment { + path: &path[start_of_path..], + generic_args, + }, + PathFinished::Yes, + )) +} + +#[derive(Debug, Clone, Copy, PartialEq)] +struct QPath<'a> { + qself: &'a str, + trait_: &'a str, + pathy_bit: &'a str, +} + +fn qpath_components(qpath: QPath<'_>) -> Result> { + if qpath.qself.starts_with('<') { + if let Ok(sub_qpath) = parse_qpath(qpath.qself) { + let mut sub_components = qpath_components(sub_qpath)?; + sub_components.extend(qpath.pathy_bit.split("::")); + Ok(sub_components) + } else { + Ok(qpath + .qself + .split("::") + .chain(qpath.pathy_bit.split("::")) + .collect()) + } + } else { + Ok(qpath + .qself + .split("::") + .chain(qpath.pathy_bit.split("::")) + .collect()) + } +} + +// FIXME: Apparently the symbol `std::os::linux::process:: for std::os::fd::owned::OwnedFd>::from` exists in std +// I have no clue what to do about that. + +fn parse_qpath(s: &str) -> Result> { + let mut chars = s.char_indices().skip(1); + let mut angle_brackets = 1u64; + + let mut result = None; + let mut as_idx = None; + + while let Some((idx, char)) = chars.next() { + match char { + '<' => angle_brackets += 1, + '>' => { + angle_brackets -= 1; + if angle_brackets == 0 { + result = Some(idx); + break; + } + } + ' ' => { + if angle_brackets == 1 && as_idx == None { + as_idx = Some(idx); + } + } + _ => {} + } + } + + let q_close_idx = result.wrap_err_with(|| { + format!("qualified symbol `{s}` does not end qualified part with > properly") + })?; + + let as_idx = + as_idx.wrap_err_with(|| format!("qualified symbol `{s}` does not contain ` as `"))?; + + let q = &s[..q_close_idx]; + let pathy_bit = &s[q_close_idx + 1..]; + let pathy_bit = pathy_bit.strip_prefix("::").wrap_err_with(|| { + format!("path after qualification does not start with `::`: `{pathy_bit}`") + })?; + + let qself = &q[1..as_idx]; + let trait_ = &q[(as_idx + " as ".len())..]; + + Ok(QPath { + qself, + trait_, + pathy_bit, + }) +} + +#[cfg(test)] +mod tests { + use crate::symbol_components; + use crate::symbols::PathSegment; + + use super::Path; + use super::QPath; + + use super::parse_qpath; + + fn vec(i: impl IntoIterator>) -> Vec { + i.into_iter().map(Into::into).collect::>() + } + + fn parse_path(s: &str) -> Path { + super::parse_path(s, &mut s.char_indices().peekable()).unwrap() + } + + #[test] + fn paths() { + let seg = |path| PathSegment { + path, + generic_args: Vec::new(), + }; + let seg_gen = |path, generic_args| PathSegment { path, generic_args }; + let single_path = |path| Path(vec![seg(path)]); + + assert_eq!( + parse_path("core::panicking::panic_nounwind::h078e837899a661cc"), + Path(vec![ + seg("core"), + seg("panicking"), + seg_gen("panic_nounwind", vec![single_path("T")]), + seg("h078e837899a661cc") + ]) + ); + + assert_eq!( + parse_path("core::panicking::panic_nounwind::h078e837899a661cc"), + Path(vec![ + seg("core"), + seg("panicking"), + seg("panic_nounwind"), + seg("h078e837899a661cc") + ]) + ); + } + + #[test] + fn components() { + assert_eq!( + symbol_components("core::panicking::panic_nounwind::h078e837899a661cc").unwrap(), + vec(["core", "panicking", "panic_nounwind", "h078e837899a661cc"]) + ); + assert_eq!( + symbol_components("std::sync::once_lock::OnceLock::initialize::h37ee4f85094ef3f6") + .unwrap(), + vec([ + "std", + "sync", + "once_lock", + "OnceLock", + "initialize", + "h37ee4f85094ef3f6" + ]) + ); + assert_eq!( + symbol_components("<&T as core::fmt::Debug>::fmt::h59637bc6facdc591").unwrap(), + vec(["&T", "fmt", "h59637bc6facdc591"]) + ); + assert_eq!( + symbol_components( + "core::ptr::drop_in_place::h180b14c72fab0876" + ) + .unwrap(), + vec(["core", "ptr", "drop_in_place"]) + ); + } + + #[test] + fn parse_qpaths() { + assert_eq!( + parse_qpath("::fmt").unwrap(), + QPath { + qself: "std::path::Components", + trait_: "core::fmt::Debug", + pathy_bit: "fmt", + } + ); + + assert_eq!( + parse_qpath("<::fmt::DebugHelper as core::fmt::Debug>::fmt").unwrap(), + QPath { + qself: "::fmt::DebugHelper", + trait_: "core::fmt::Debug", + pathy_bit: "fmt", + } + ); + } +}