diff --git a/Cargo.lock b/Cargo.lock index ba05446..c164970 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,6 +102,7 @@ dependencies = [ "ctrlc", "genemichaels", "libloading", + "markdown", "owo-colors", "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 5aead06..7dd0a43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ exclude = ["test-cases/*", "full-tests/*"] [package] name = "cargo-minimize" version = "0.1.0" -edition = "2021" +edition = "2024" categories = ["development-tools"] description = "A tool for minimizing rustc ICEs" keywords = ["minimization", "ICE", "rust-development"] @@ -24,6 +24,7 @@ anyhow = "1.0.65" clap = { version = "4.0.29", features = ["derive"] } ctrlc = "3.2.5" genemichaels = "0.1.21" +markdown = { version = "=1.0.0-alpha.14" } # pinning the version to ensure genemichaels builds. libloading = "0.8.0" owo-colors = "3.5.0" proc-macro2 = { version = "1.0.48", features = ["span-locations"] } diff --git a/LICENSE-MIT b/LICENSE-MIT index ab977cf..5e1d532 100644 --- a/LICENSE-MIT +++ b/LICENSE-MIT @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Nilstrieb +Copyright (c) 2025 Noratrieb and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 5431238..bff7122 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # cargo-minimize -Install with `cargo install --git https://github.com/Nilstrieb/cargo-minimize cargo-minimize` and use with `cargo minimize`. +Install with `cargo install --git https://github.com/Noratrieb/cargo-minimize cargo-minimize` and use with `cargo minimize`. -For more info, see the [cookbook](https://github.com/Nilstrieb/cargo-minimize#cookbook). +For more info, see the [cookbook](https://github.com/Noratrieb/cargo-minimize#cookbook). ## Idea @@ -37,12 +37,18 @@ Options: Additional environment variables to pass to cargo/rustc. Example: `--env NAME=VALUE --env ANOTHER_NAME=VALUE` --project-dir The working directory where cargo/rustc are invoked in. By default, this is the current working directory + --passes + A comma-seperated list of passes that should be enabled. By default, all passes are enabled. If a pass is prefixed with `no-`, it will be disabled --script-path A path to a script that is run to check whether code reproduces. When it exits with code 0, the problem reproduces. If `--script-path-lints` isn't set, this script is also run to get lints. For lints, the `MINIMIZE_LINTS` environment variable will be set to `1`. The first line of the lint stdout or stderr can be `minimize-fmt-rustc` or `minimize-fmt-cargo` to show whether the rustc or wrapper cargo lint format and which output stream is used. Defaults to cargo and stdout --script-path-lints A path to a script that is run to get lints. The first line of stdout or stderr must be `minimize-fmt-rustc` or `minimize-fmt-cargo` to show whether the rustc or wrapper cargo lint format and which output stream is used. Defaults to cargo and stdout + --ignore-file + Do not touch the following files + --bisect-delete-imports + Remove individual use statements manually, instead of relying on rustc lints output -h, --help - Print help information + Print help ``` Note: You can safely press `Ctrl-C` when running cargo-minimize. It will rollback the current minimization attempt and give you the latest known-reproducing state. diff --git a/flake.lock b/flake.lock index 58dc2b8..3725da4 100644 --- a/flake.lock +++ b/flake.lock @@ -5,11 +5,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1694529238, - "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", "owner": "numtide", "repo": "flake-utils", - "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", "type": "github" }, "original": { @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1695145219, - "narHash": "sha256-Eoe9IHbvmo5wEDeJXKFOpKUwxYJIOxKUesounVccNYk=", + "lastModified": 1744932701, + "narHash": "sha256-fusHbZCyv126cyArUwwKrLdCkgVAIaa/fQJYFlCEqiU=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "5ba549eafcf3e33405e5f66decd1a72356632b96", + "rev": "b024ced1aac25639f8ca8fdfc2f8c4fbd66c48ef", "type": "github" }, "original": { diff --git a/full-tests/nested-items.rs b/full-tests/nested-items.rs new file mode 100644 index 0000000..3f71f42 --- /dev/null +++ b/full-tests/nested-items.rs @@ -0,0 +1,8 @@ +pub mod foo { + /// ~MINIMIZE-ROOT good + pub fn good(){} + /// ~REQUIRE-DELETED bad + pub fn bad(){} +} +/// ~MINIMIZE-ROOT main +fn main(){} diff --git a/full-tests/nested-items2.rs b/full-tests/nested-items2.rs new file mode 100644 index 0000000..ed02dda --- /dev/null +++ b/full-tests/nested-items2.rs @@ -0,0 +1,24 @@ +// this should all get deleted in a single swoop *and* not panic about it +/// ~REQUIRE-DELETED l1 +mod l1 { + mod l2 { + mod l3 { + mod l4{ + mod l5 { + fn foo(){} + fn bar(){} + mod l6 { + fn x1(){} + } + fn x2(){} + } + } + mod l4_2 { + fn y(){} + } + } + } + fn x8(){} +} +/// ~MINIMIZE-ROOT main +fn main(){} diff --git a/full-tests/reexports.rs b/full-tests/reexports.rs new file mode 100644 index 0000000..e60730b --- /dev/null +++ b/full-tests/reexports.rs @@ -0,0 +1,12 @@ + +use hello::{thingy, whatever}; +mod hello{ + pub fn thingy(){} + /// ~REQUIRE-DELETED whatever + pub fn whatever(){} +} + +fn main(){ + "~MINIMIZE-ROOT let x = thingy"; + let x = thingy(); +} diff --git a/full-tests/reexports2.rs b/full-tests/reexports2.rs new file mode 100644 index 0000000..ea1b6ea --- /dev/null +++ b/full-tests/reexports2.rs @@ -0,0 +1,15 @@ +mod A { + /// ~REQUIRE-DELETED S1 + pub struct S1; + pub struct S2; +} + +mod B { + use crate::A::{self, S1}; + pub use A::S2 as thingy; +} + +fn main() { + "~MINIMIZE-ROOT let x = B::thingy"; + let x = B::thingy; +} diff --git a/src/build.rs b/src/build.rs index 054564b..87d67f8 100644 --- a/src/build.rs +++ b/src/build.rs @@ -1,4 +1,4 @@ -use anyhow::{bail, ensure, Context, Result}; +use anyhow::{Context, Result, bail, ensure}; use rustfix::diagnostics::Diagnostic; use serde::Deserialize; use std::{ @@ -10,7 +10,7 @@ use std::{ rc::Rc, }; -use crate::{dylib_flag::RustFunction, EnvVar, Options}; +use crate::{EnvVar, Options, dylib_flag::RustFunction}; #[derive(Debug, Clone)] pub struct Build { diff --git a/src/dylib_flag.rs b/src/dylib_flag.rs index 229ea6d..1fca586 100644 --- a/src/dylib_flag.rs +++ b/src/dylib_flag.rs @@ -25,7 +25,8 @@ impl FromStr for RustFunction { type Err = anyhow::Error; fn from_str(s: &str) -> Result { - Self::compile(s).context("compiling and loading rust function") + Self::compile(s) + .map_err(|e| anyhow::format_err!("compiling and loading rust function: {:?}", e)) } } diff --git a/src/expand.rs b/src/expand.rs index 88b08f6..bbc9b46 100644 --- a/src/expand.rs +++ b/src/expand.rs @@ -1,18 +1,18 @@ // this code is pretty neat i guess but i dont have a use for it right now #![allow(dead_code)] -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result, bail}; use cargo::{ core::{ + Workspace, compiler::{BuildContext, Unit, UnitInterner}, manifest::TargetSourcePath, - Workspace, }, ops::{self, CompileOptions}, - util::{command_prelude::CompileMode, Config}, + util::{Config, command_prelude::CompileMode}, }; use std::{collections::BTreeSet, fmt::Debug, ops::Not, path::Path, process::Command}; -use syn::{visit_mut::VisitMut, File, Item, ItemExternCrate, ItemMod, ItemUse, Visibility}; +use syn::{File, Item, ItemExternCrate, ItemMod, ItemUse, Visibility, visit_mut::VisitMut}; fn cargo_expand(cargo_dir: &TargetSourcePath) -> Result { let cargo_dir = cargo_dir diff --git a/src/lib.rs b/src/lib.rs index dc8dd21..c944f28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ extern crate tracing; use std::{ path::PathBuf, str::FromStr, - sync::{atomic::AtomicBool, Arc}, + sync::{Arc, atomic::AtomicBool}, }; mod build; @@ -15,14 +15,15 @@ mod processor; pub use build::rustup_which; -#[cfg(this_pulls_in_cargo_which_is_a_big_dep_i_dont_like_it)] +// this experimental and doesnt really work +#[cfg(any())] mod expand; use anyhow::{Context, Result}; use dylib_flag::RustFunction; -use processor::Minimizer; +use processor::{Minimizer, PassSelection}; use tracing::Level; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Registry}; +use tracing_subscriber::{EnvFilter, Registry, layer::SubscriberExt, util::SubscriberInitExt}; use crate::processor::Pass; @@ -82,8 +83,9 @@ pub struct Options { pub path: PathBuf, /// A comma-seperated list of passes that should be enabled. By default, all passes are enabled. + /// If a pass is prefixed with `no-`, it will be disabled. #[arg(long)] - pub passes: Option, + pub passes: Option, /// A path to a script that is run to check whether code reproduces. When it exits with code 0, the /// problem reproduces. If `--script-path-lints` isn't set, this script is also run to get lints. @@ -105,6 +107,10 @@ pub struct Options { #[arg(skip)] pub no_delete_functions: bool, + + /// Remove individual use statements manually, instead of relying on rustc lints output + #[arg(long)] + pub bisect_delete_imports: bool, } #[derive(Debug, Clone)] @@ -142,6 +148,7 @@ pub fn minimize(options: Options, stop: Arc) -> Result<()> { minimizer.run_passes([ passes::EverybodyLoops::default().boxed(), + passes::SplitUse::default().boxed(), passes::FieldDeleter::default().boxed(), passes::Privatize::default().boxed(), ])?; @@ -186,6 +193,7 @@ impl Default for Options { script_path_lints: None, ignore_file: Vec::new(), no_delete_functions: false, + bisect_delete_imports: false, } } } diff --git a/src/main.rs b/src/main.rs index d1b466d..6b70328 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,13 +2,13 @@ extern crate tracing; use std::sync::{ - atomic::{AtomicBool, Ordering}, Arc, + atomic::{AtomicBool, Ordering}, }; use anyhow::Result; use cargo_minimize::{Cargo, Parser}; -use tracing::{error, Level}; +use tracing::{Level, error}; fn main() -> Result<()> { let Cargo::Minimize(options) = Cargo::parse(); diff --git a/src/passes/everybody_loops.rs b/src/passes/everybody_loops.rs index 897175f..5ad735a 100644 --- a/src/passes/everybody_loops.rs +++ b/src/passes/everybody_loops.rs @@ -1,7 +1,7 @@ use quote::ToTokens; use syn::{parse_quote, visit_mut::VisitMut}; -use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; +use crate::processor::{Pass, PassController, ProcessState, SourceFile, tracking}; struct Visitor<'a> { current_path: Vec, @@ -25,9 +25,11 @@ impl<'a> Visitor<'a> { impl VisitMut for Visitor<'_> { fn visit_block_mut(&mut self, block: &mut syn::Block) { match block.stmts.as_slice() { - [syn::Stmt::Expr(syn::Expr::Loop(syn::ExprLoop { - body: loop_body, .. - }))] if loop_body.stmts.is_empty() => {} + [ + syn::Stmt::Expr(syn::Expr::Loop(syn::ExprLoop { + body: loop_body, .. + })), + ] if loop_body.stmts.is_empty() => {} // Empty bodies are empty already, no need to loopify them. [] => {} _ if self.checker.can_process(&self.current_path) => { diff --git a/src/passes/field_deleter.rs b/src/passes/field_deleter.rs index bd22276..b3c75bc 100644 --- a/src/passes/field_deleter.rs +++ b/src/passes/field_deleter.rs @@ -1,7 +1,7 @@ use quote::ToTokens; -use syn::{visit_mut::VisitMut, Fields}; +use syn::{Fields, visit_mut::VisitMut}; -use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; +use crate::processor::{Pass, PassController, ProcessState, SourceFile, tracking}; struct Visitor<'a> { current_path: Vec, diff --git a/src/passes/item_deleter.rs b/src/passes/item_deleter.rs index 75cc9bf..96bfa11 100644 --- a/src/passes/item_deleter.rs +++ b/src/passes/item_deleter.rs @@ -1,10 +1,11 @@ use quote::ToTokens; use syn::{ - visit_mut::VisitMut, Item, ItemConst, ItemEnum, ItemExternCrate, ItemFn, ItemMacro, ItemMacro2, - ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, Signature, + Item, ItemConst, ItemEnum, ItemExternCrate, ItemFn, ItemMacro, ItemMacro2, ItemMod, ItemStatic, + ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, ItemUse, Signature, + visit_mut::VisitMut, }; -use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; +use crate::processor::{Pass, PassController, ProcessState, SourceFile, tracking}; struct Visitor<'a> { current_path: Vec, @@ -73,9 +74,17 @@ impl<'a> Visitor<'a> { self.current_path.pop(); should_retain } + // We would hope for the unused imports pass to catch all of these + // but sadly that's not the case + Item::Use(ItemUse { tree, .. }) if self.checker.options.bisect_delete_imports => { + self.current_path.push(tree.to_token_stream().to_string()); + + let should_retain = self.should_retain_item(); + + self.current_path.pop(); + should_retain + } Item::ForeignMod(_) => true, - // We hope for the unused imports to show them all. - Item::Use(_) => true, Item::Verbatim(_) => true, _ => true, } diff --git a/src/passes/mod.rs b/src/passes/mod.rs index b17bb05..31fd014 100644 --- a/src/passes/mod.rs +++ b/src/passes/mod.rs @@ -2,8 +2,9 @@ mod everybody_loops; mod field_deleter; mod item_deleter; mod privatize; +mod split_use; pub use self::{ everybody_loops::EverybodyLoops, field_deleter::FieldDeleter, item_deleter::ItemDeleter, - privatize::Privatize, + privatize::Privatize, split_use::SplitUse, }; diff --git a/src/passes/privatize.rs b/src/passes/privatize.rs index c2aa30d..754d199 100644 --- a/src/passes/privatize.rs +++ b/src/passes/privatize.rs @@ -1,7 +1,7 @@ use quote::ToTokens; -use syn::{parse_quote, visit_mut::VisitMut, Visibility}; +use syn::{Visibility, parse_quote, visit_mut::VisitMut}; -use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; +use crate::processor::{Pass, PassController, ProcessState, SourceFile, tracking}; struct Visitor<'a> { pub_crate: Visibility, @@ -24,12 +24,32 @@ impl<'a> Visitor<'a> { impl VisitMut for Visitor<'_> { fn visit_visibility_mut(&mut self, vis: &mut Visibility) { if let Visibility::Public(_) = vis { + self.current_path.push("{{vis}}".to_string()); if self.checker.can_process(&self.current_path) { self.process_state = ProcessState::Changed; *vis = self.pub_crate.clone(); } + self.current_path.pop(); } } + fn visit_item_mut(&mut self, item: &mut syn::Item) { + match item { + syn::Item::Use(u) => { + if let Visibility::Public(_) = u.vis { + let mut path = self.current_path.clone(); + path.push(u.to_token_stream().to_string()); + if self.checker.can_process(&path) { + self.process_state = ProcessState::Changed; + u.vis = self.pub_crate.clone(); + } + path.pop(); + } + return; // early return; do not walk the child items + } + _ => {} + } + syn::visit_mut::visit_item_mut(self, item); + } tracking!(); } diff --git a/src/passes/split_use.rs b/src/passes/split_use.rs new file mode 100644 index 0000000..500c664 --- /dev/null +++ b/src/passes/split_use.rs @@ -0,0 +1,181 @@ +use std::ops::DerefMut; + +use crate::processor::{Pass, PassController, ProcessState, SourceFile, tracking}; +use quote::ToTokens; + +use syn::{Item, ItemUse, UseName, UsePath, UseRename, UseTree, visit_mut::VisitMut}; + +struct Visitor<'a> { + process_state: ProcessState, + current_path: Vec, + checker: &'a mut PassController, +} + +impl<'a> Visitor<'a> { + fn new(checker: &'a mut PassController) -> Self { + Self { + process_state: ProcessState::NoChange, + current_path: Vec::new(), + checker, + } + } + + // given a "some::group::{a, b::{c,d}, e}" tree, and assuming checker allows processing of (only) "some::group", + // returns a ["some::group::a", "some::group::b::{c,d}", "some::group::e"] list of trees. + fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &UseTree) -> Vec { + // It would probably be nice if instead of *expanding* the whole "some::group" group, we could instead + // *extract* individual items ("some::group::a"), but that makes code much more convoluted, sadly + match tree { + UseTree::Path(p) => { + self.current_path.push(p.ident.to_string()); + + let out = self + .expand_use_groups(top, &p.tree) + .into_iter() + .map(|x| { + let mut new = p.clone(); + new.tree = Box::new(x); + UseTree::Path(new) + }) + .collect(); + + self.current_path.pop(); + out + } + UseTree::Group(g) => { + let new_trees = g + .items + .iter() + .map(|subtree| self.expand_use_groups(top, subtree)) + .flatten() + .collect::>(); + + self.current_path.push("{{group}}".to_string()); + let can_process = self.checker.can_process(&self.current_path); + self.current_path.pop(); + + if can_process { + self.process_state = ProcessState::Changed; + return new_trees; + } else { + // Do not expand the group. + // recreate the UseTree::Group item (but with new subtrees), and return a single-element list + let mut g = g.clone(); + g.items.clear(); + g.items.extend(new_trees); + return vec![syn::UseTree::Group(g)]; + } + } + _ => return vec![tree.clone()], + } + } + + fn visit_item_list(&mut self, items: &mut Vec) { + let mut pos = 0; // index into the `items` list + while pos < items.len() { + let item_use: ItemUse = { + match &items[pos] { + Item::Use(u) => u.clone(), + _ => { + pos += 1; // if it's not a `use`` - simply advance to the next item + continue; + } + } + }; + + let new_use_trees = self.expand_use_groups(&item_use, &item_use.tree); + // decorate each of the UseTree with a `use` keyword (and any attributes inherited) + let new_uses = new_use_trees.into_iter().map(|x| { + let mut new = item_use.clone(); + new.tree = x; + trim_trailing_self(&mut new.tree); + syn::Item::Use(new) + }); + + let step = new_uses.len(); + // replace the old use with the new uses + items.splice(pos..pos + 1, new_uses); + pos += step; // do not process freshly inserted items + } + } +} + +// It is legal to write "use module::{self};", but not "use module::self;". +// If we do end up with the latter on our hands, convert it to "use module;" instead. +fn trim_trailing_self(use_tree: &mut UseTree) { + match use_tree { + UseTree::Path(UsePath { + tree: subtree, + ident: base_ident, + .. + }) => match subtree.deref_mut() { + UseTree::Name(UseName { ident: sub_ident }) => { + if sub_ident == "self" { + *use_tree = UseTree::Name(UseName { + ident: base_ident.clone(), + }); + return; + } + } + UseTree::Rename(syn::UseRename { + ident: sub_ident, + rename, + as_token, + }) => { + if sub_ident == "self" { + *use_tree = UseTree::Rename(UseRename { + ident: base_ident.clone(), + rename: rename.clone(), + as_token: as_token.clone(), + }); + return; + } + } + UseTree::Path(_) => trim_trailing_self(&mut *subtree), + _ => {} + }, + _ => {} + } +} + +impl VisitMut for Visitor<'_> { + fn visit_item_mod_mut(&mut self, item_mod: &mut syn::ItemMod) { + self.current_path.push(item_mod.ident.to_string()); + if let Some((_, items)) = &mut item_mod.content { + self.visit_item_list(items); + } + syn::visit_mut::visit_item_mod_mut(self, item_mod); + self.current_path.pop(); + } + fn visit_file_mut(&mut self, file: &mut syn::File) { + self.visit_item_list(&mut file.items); + syn::visit_mut::visit_file_mut(self, file); + } + + tracking!(visit_item_fn_mut); + tracking!(visit_impl_item_method_mut); + tracking!(visit_item_impl_mut); + tracking!(visit_field_mut); + tracking!(visit_item_struct_mut); + tracking!(visit_item_trait_mut); +} + +#[derive(Default)] +pub struct SplitUse {} + +impl Pass for SplitUse { + fn process_file( + &mut self, + krate: &mut syn::File, + _: &SourceFile, + checker: &mut PassController, + ) -> ProcessState { + let mut visitor = Visitor::new(checker); + visitor.visit_file_mut(krate); + visitor.process_state + } + + fn name(&self) -> &'static str { + "split-use" + } +} diff --git a/src/processor/checker.rs b/src/processor/checker.rs index 748fda2..b88115e 100644 --- a/src/processor/checker.rs +++ b/src/processor/checker.rs @@ -6,6 +6,14 @@ use self::worklist::Worklist; #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] struct AstPath(Vec); +impl AstPath { + fn has_prefix(&self, other: &AstPath) -> bool { + if self.0.len() < other.0.len() { + return false; + } + std::iter::zip(self.0.iter(), other.0.iter()).all(|(a, b)| a == b) + } +} impl Borrow<[String]> for AstPath { fn borrow(&self) -> &[String] { @@ -75,6 +83,20 @@ mod worklist { pub(super) fn pop(&mut self) -> Option> { self.0.pop() } + + // remove all the worklist items that would have been covered by the + // given ones. + // I.e. if we have already deleted the entire module, there's no need + // trying to delete that module's individual items anymore + pub(super) fn prune(&mut self, things: &std::collections::BTreeSet) { + for wl in &mut self.0 { + wl.retain(|path| { + // retain only if none of the things are a prefix of this path + things.iter().all(|thing| !path.has_prefix(thing)) + }) + } + self.0.retain(|wl| !wl.is_empty()); + } } } @@ -97,8 +119,9 @@ impl PassController { committed, failed: _, current, - worklist: _, + worklist, } => { + worklist.prune(current); committed.extend(mem::take(current)); self.next_in_worklist(); @@ -110,19 +133,10 @@ impl PassController { /// The changes did not reproduce the regression. Bisect further. pub fn does_not_reproduce(&mut self) { match &mut self.state { - PassControllerState::InitialCollection { candidates } => { - // Applying them all was too much, let's bisect! - let (current, first_worklist_item) = split_owned(mem::take(candidates)); - - let mut worklist = Worklist::new(); - worklist.push(first_worklist_item); - - self.state = PassControllerState::Bisecting { - committed: BTreeSet::new(), - failed: BTreeSet::new(), - current, - worklist, - }; + PassControllerState::InitialCollection { candidates: _ } => { + unreachable!( + "we should have made no changes on initial collection, what do you mean it does not reproduce?!?" + ) } PassControllerState::Bisecting { committed, @@ -155,18 +169,36 @@ impl PassController { } } - /// The pass did not apply any changes. We're done. + /// The pass did not apply any changes. We're either done or just starting pub fn no_change(&mut self) { - match &self.state { + match &mut self.state { PassControllerState::InitialCollection { candidates } => { - assert!( - candidates.is_empty(), - "No change but received candidates. The responsible pass does not seem to track the ProcessState correctly: {candidates:?}" - ); - self.state = PassControllerState::Success; + if candidates.is_empty() { + self.state = PassControllerState::Success; + } else { + // We could just set `current=candidates; worklist=default()`, + // but we are doing a minor optimization to split overlapping items into separate tries, + // so as to reduce the number of bisection steps that yield literally no new info. + let layers = layer_candidates(mem::take(candidates)); + let mut worklist = Worklist::new(); + for layer in layers.into_iter().rev() { + // .rev() so that we add shorter paths last, and process them first + worklist.push(layer); + } + let current = worklist.pop().unwrap().into_iter().collect(); + + self.state = PassControllerState::Bisecting { + committed: BTreeSet::new(), + failed: BTreeSet::new(), + current, + worklist, + }; + } } PassControllerState::Bisecting { current, .. } => { - unreachable!("Pass said it didn't change anything in the bisection phase, nils forgot what this means: {current:?}"); + unreachable!( + "Pass said it didn't change anything in the bisection phase, nora forgot what this means: {current:?}" + ); } PassControllerState::Success { .. } => {} } @@ -184,9 +216,9 @@ impl PassController { pub fn can_process(&mut self, path: &[String]) -> bool { match &mut self.state { PassControllerState::InitialCollection { candidates } => { - // For the initial collection, we collect the candidate and apply them all. + // For the initial collection, we collect the candidate but don't apply anything candidates.push(AstPath(path.to_owned())); - true + false } PassControllerState::Bisecting { current, .. } => current.contains(path), PassControllerState::Success { .. } => { @@ -205,6 +237,7 @@ impl PassController { match worklist.pop() { Some(next) => { *current = next.into_iter().collect(); + trace!(?current, "current working set: "); } None => { self.state = PassControllerState::Success; @@ -218,11 +251,7 @@ impl PassController { pub const fn div_ceil(lhs: usize, rhs: usize) -> usize { let d = lhs / rhs; let r = lhs % rhs; - if r > 0 && rhs > 0 { - d + 1 - } else { - d - } + if r > 0 && rhs > 0 { d + 1 } else { d } } /// Splits an owned container in half. @@ -239,3 +268,33 @@ fn split_owned, A: FromIterator, B: FromItera (first_half, second_half) } + +/// Split up a list of AstPath's into several sets such that none of those sets have +/// any of the AstPath's overlap. +/// I.e. so that we avoid having both ["foo"] and ["foo", "bar"] in the same set. +/// It is expected, but not guaranteed that the earlier sets would contain "less granular" items +/// (i.e. ["foo"] from the above example) and the latter sets would contain the "more granular" ones. +fn layer_candidates(mut candidates: Vec) -> Vec> { + candidates.sort(); // this *should* put less-granular/shorter-path items first + let mut layers: Vec> = vec![]; + for candidate in candidates { + let mut appropriate_layer_no = None; + for (no, layer) in layers.iter().enumerate() { + if !layer + .iter() + .any(|known_path| candidate.has_prefix(known_path)) + { + appropriate_layer_no = Some(no); + break; + } + } + match appropriate_layer_no { + Some(no) => layers[no].push(candidate), + None => { + let new_layer = vec![candidate]; + layers.push(new_layer); + } + } + } + layers +} diff --git a/src/processor/mod.rs b/src/processor/mod.rs index 2843829..32ae865 100644 --- a/src/processor/mod.rs +++ b/src/processor/mod.rs @@ -3,11 +3,11 @@ mod files; mod reaper; pub(crate) use self::files::SourceFile; -use crate::{build::Build, processor::files::Changes, Options}; -use anyhow::{bail, Context, Result}; +use crate::{Options, build::Build, processor::files::Changes}; +use anyhow::{Context, Result, bail}; use owo_colors::OwoColorize; -use std::sync::atomic::Ordering; use std::sync::Arc; +use std::sync::atomic::Ordering; use std::{collections::HashSet, ffi::OsStr, fmt::Debug, sync::atomic::AtomicBool}; pub(crate) use self::checker::PassController; @@ -19,7 +19,7 @@ pub(crate) trait Pass { /// Process a file. The state of the processor might get invalidated in the process as signaled with /// `ProcessState::FileInvalidated`. When a file is invalidated, the minimizer will call `Processor::refersh_state` - /// before calling the this function on the same file again. + /// before calling this function on the same file again. fn process_file( &mut self, krate: &mut syn::File, @@ -50,6 +50,33 @@ pub(crate) enum ProcessState { FileInvalidated, } +#[derive(Debug, Clone)] +pub enum PassSelection { + Enable(Vec), + Disable(Vec), +} +impl std::str::FromStr for PassSelection { + type Err = &'static str; + fn from_str(s: &str) -> std::result::Result { + let values = s.split(',').collect::>(); + let have_negative = values.iter().any(|v| v.starts_with("no-")); + if have_negative && !values.iter().all(|v| v.starts_with("no-")) { + return Err( + "Pass exclusion is supported, by mixing positive pass selection with negative is not allowed (because it's pointless and confusing)", + ); + } + let actual_values = values + .into_iter() + .map(|v| v.strip_prefix("no-").unwrap_or(v).to_string()) + .collect(); + if !have_negative { + Ok(PassSelection::Enable(actual_values)) + } else { + Ok(PassSelection::Disable(actual_values)) + } + } +} + #[derive(Debug)] pub(crate) struct Minimizer { files: Vec, @@ -59,13 +86,12 @@ pub(crate) struct Minimizer { } impl Minimizer { - fn pass_disabled(&self, name: &str) -> bool { - if let Some(passes) = &self.options.passes { - if !passes.split(",").any(|allowed| name == allowed) { - return true; - } + fn pass_enabled(&self, name: &str) -> bool { + match &self.options.passes { + None => true, + Some(PassSelection::Enable(v)) => v.iter().any(|allowed| name == allowed), + Some(PassSelection::Disable(v)) => v.iter().all(|forbidden| name != forbidden), } - false } pub(crate) fn new_glob_dir( @@ -131,7 +157,7 @@ impl Minimizer { inital_build.require_reproduction("Initial")?; for mut pass in passes { - if self.pass_disabled(pass.name()) { + if !self.pass_enabled(pass.name()) { continue; } self.run_pass(&mut *pass)?; @@ -187,6 +213,7 @@ impl Minimizer { // The logic for bisecting down lives in PassController. let mut checker = PassController::new(self.options.clone()); + let mut initial_pass = true; loop { let mut change = file.try_change(changes)?; let (_, krate) = change.before_content(); @@ -203,24 +230,27 @@ impl Minimizer { if after.reproduces_issue() { change.commit(); checker.reproduces(); + if has_made_change == ProcessState::FileInvalidated { + invalidated_files.insert(file); + break; + } } else { change.rollback()?; checker.does_not_reproduce(); } - - if has_made_change == ProcessState::FileInvalidated { - invalidated_files.insert(file); - } } ProcessState::NoChange => { - if self.options.no_color { - info!("{file:?}: After {}: no changes", pass.name()); - } else { - info!("{file:?}: After {}: {}", pass.name(), "no changes".yellow()); + if !initial_pass { + if self.options.no_color { + info!("{file:?}: After {}: no changes", pass.name()); + } else { + info!("{file:?}: After {}: {}", pass.name(), "no changes".yellow()); + } } checker.no_change(); } } + initial_pass = false; if self.cancel.load(Ordering::SeqCst) { info!("Exiting early."); diff --git a/src/processor/reaper.rs b/src/processor/reaper.rs index 5361d15..8fda09b 100644 --- a/src/processor/reaper.rs +++ b/src/processor/reaper.rs @@ -2,17 +2,13 @@ use crate::build::Build; -use super::{files::Changes, tracking, Minimizer, Pass, PassController, ProcessState, SourceFile}; +use super::{Minimizer, Pass, PassController, ProcessState, SourceFile, files::Changes, tracking}; use anyhow::{Context, Result}; use proc_macro2::Span; use quote::ToTokens; -use rustfix::{diagnostics::Diagnostic, Suggestion}; -use std::{ - collections::{HashMap, HashSet}, - ops::Range, - path::{Path, PathBuf}, -}; -use syn::{visit_mut::VisitMut, ImplItem, Item}; +use rustfix::{Suggestion, diagnostics::Diagnostic}; +use std::{collections::HashMap, ops::Range, path::Path}; +use syn::{ImplItem, Item, visit_mut::VisitMut}; fn file_for_suggestion(suggestion: &Suggestion) -> &Path { Path::new(&suggestion.solutions[0].replacements[0].snippet.file_name) @@ -22,7 +18,7 @@ const PASS_NAME: &str = "delete-unused-functions"; impl Minimizer { pub fn delete_dead_code(&mut self) -> Result<()> { - if self.pass_disabled(PASS_NAME) { + if !self.pass_enabled(PASS_NAME) { return Ok(()); } @@ -79,9 +75,17 @@ impl Minimizer { .copied() .cloned() .collect::>(); + if desired_suggestions.is_empty() { + continue; + } let result = rustfix::apply_suggestions(change.before_content().0, &desired_suggestions)?; + anyhow::ensure!( + result != change.before_content().0, + "Suggestions 'applied' but no changes made??" + ); + let result = syn::parse_file(&result).context("parsing file after rustfix")?; change.write(result)?; @@ -103,16 +107,11 @@ impl Minimizer { struct DeleteUnusedFunctions { diags: Vec, build: Build, - invalid: HashSet, } impl DeleteUnusedFunctions { fn new(build: Build, diags: Vec) -> Self { - DeleteUnusedFunctions { - diags, - build, - invalid: HashSet::new(), - } + DeleteUnusedFunctions { diags, build } } } @@ -120,7 +119,6 @@ impl Pass for DeleteUnusedFunctions { fn refresh_state(&mut self) -> Result<()> { let (diags, _) = self.build.get_diags().context("getting diagnostics")?; self.diags = diags; - self.invalid.clear(); Ok(()) } @@ -130,18 +128,9 @@ impl Pass for DeleteUnusedFunctions { file: &SourceFile, checker: &mut super::PassController, ) -> ProcessState { - assert!( - !self.invalid.contains(file.path_no_fs_interact()), - "processing with invalid state" - ); - let mut visitor = FindUnusedFunction::new(file, self.diags.iter(), checker); visitor.visit_file_mut(krate); - if visitor.process_state == ProcessState::FileInvalidated { - self.invalid.insert(file.path_no_fs_interact().to_owned()); - } - visitor.process_state } @@ -235,8 +224,12 @@ impl<'a> FindUnusedFunction<'a> { match span_matches { 0 => true, 1 => { - self.process_state = ProcessState::FileInvalidated; - !self.checker.can_process(&self.current_path) + if self.checker.can_process(&self.current_path) { + self.process_state = ProcessState::FileInvalidated; + !self.checker.can_process(&self.current_path) + } else { + true + } } _ => { panic!("multiple dead_code spans matched identifier: {span_matches}."); diff --git a/tests/full_tests.rs b/tests/full_tests.rs index db14af8..e335ea3 100644 --- a/tests/full_tests.rs +++ b/tests/full_tests.rs @@ -1,4 +1,4 @@ -use anyhow::{ensure, Result}; +use anyhow::{Result, ensure}; use std::process::Command; #[test] diff --git a/testsuite/src/lib.rs b/testsuite/src/lib.rs index 8c7e031..f841e5c 100644 --- a/testsuite/src/lib.rs +++ b/testsuite/src/lib.rs @@ -170,6 +170,7 @@ fn build(cargo: &Path, path: &Path, regression_checker_path: &Path) -> Result<() flag.push(regression_checker_path); flag }); + cmd.arg("--bisect-delete-imports"); let minimize_roots = start_roots.join(",");