From 440e2034f0e4e6bff80a57bc66c9d4302ce1fb01 Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 22:25:37 -0700 Subject: [PATCH 01/15] Update install instructions --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5431238..ac0e04d 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # cargo-minimize -Install with `cargo install --git https://github.com/Nilstrieb/cargo-minimize cargo-minimize` and use with `cargo minimize`. +Install with `cargo install --git https://github.com/Noratrieb/cargo-minimize cargo-minimize` and use with `cargo minimize`. -For more info, see the [cookbook](https://github.com/Nilstrieb/cargo-minimize#cookbook). +For more info, see the [cookbook](https://github.com/Noratrieb/cargo-minimize#cookbook). ## Idea From a787b0023dd7ca47ed80b7d8eac6bfd6a56d00bb Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 14:40:25 -0700 Subject: [PATCH 02/15] Pin markdown version to fix build error markdown made a breaking change between 1.0.0-alpha.20 and 1.0.0-alpha.21 which made genemichaels stop building. genemichaels never updated (as a library) to fix that, so if we want to continue using it we have to ping markdown. The error: error[E0599]: no variant or associated item named `BlockQuote` found for enum `Node` in the current scope --> [path-to]\genemichaels-0.1.21\src\comments.rs:633:15 | 633 | Node::BlockQuote(x) => { | ^^^^^^^^^^ variant or associated item not found in `Node` | help: there is a variant with a similar name | 633 | Node::Blockquote(x) => { | ~~~~~~~~~~ --- Cargo.lock | 1 + Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index ba05446..c164970 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,6 +102,7 @@ dependencies = [ "ctrlc", "genemichaels", "libloading", + "markdown", "owo-colors", "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 5aead06..6dcdd56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ anyhow = "1.0.65" clap = { version = "4.0.29", features = ["derive"] } ctrlc = "3.2.5" genemichaels = "0.1.21" +markdown = { version = "=1.0.0-alpha.14" } # pinning the version to ensure genemichaels builds. libloading = "0.8.0" owo-colors = "3.5.0" proc-macro2 = { version = "1.0.48", features = ["span-locations"] } From 1d46b44fcf881c2cde313c27bfece56647e97490 Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 14:56:48 -0700 Subject: [PATCH 03/15] Better error message on failing to parse --verify-fn clap formats the FromStr::Err's arising from parsing the flags with the Display formatter. This is a very reasonable default but it also means it loses all the context, since anyhow::Error only prints the outermost error in Display. So any failure parsing/creating/loading the function is displayed to the user as simple "compiling and loading rust function", which is impossible to debug. Using the Debug formatting in impl FromStr for RustFunction helps work around this. --- src/dylib_flag.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dylib_flag.rs b/src/dylib_flag.rs index 229ea6d..1fca586 100644 --- a/src/dylib_flag.rs +++ b/src/dylib_flag.rs @@ -25,7 +25,8 @@ impl FromStr for RustFunction { type Err = anyhow::Error; fn from_str(s: &str) -> Result { - Self::compile(s).context("compiling and loading rust function") + Self::compile(s) + .map_err(|e| anyhow::format_err!("compiling and loading rust function: {:?}", e)) } } From b4e587506f5e7c426ac8692aa671d8217346156e Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 21:41:37 -0700 Subject: [PATCH 04/15] Fix delete-unused-functions panics The pass used to (?) track invalidated files itself, but now that functionality has been moved up one level, but also kinda not really. So here we clarify this by: - making reaper not care about tracking invalidated files anymore - making processor yes care about tracking invalidated files, and ensuring that it does not call process_file again after gettin ProcessState::FileInvalidated, as it advertizes to do. --- src/processor/mod.rs | 10 +++++----- src/processor/reaper.rs | 31 ++++++++----------------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/src/processor/mod.rs b/src/processor/mod.rs index 2843829..3b7dadc 100644 --- a/src/processor/mod.rs +++ b/src/processor/mod.rs @@ -19,7 +19,7 @@ pub(crate) trait Pass { /// Process a file. The state of the processor might get invalidated in the process as signaled with /// `ProcessState::FileInvalidated`. When a file is invalidated, the minimizer will call `Processor::refersh_state` - /// before calling the this function on the same file again. + /// before calling this function on the same file again. fn process_file( &mut self, krate: &mut syn::File, @@ -203,14 +203,14 @@ impl Minimizer { if after.reproduces_issue() { change.commit(); checker.reproduces(); + if has_made_change == ProcessState::FileInvalidated { + invalidated_files.insert(file); + break; + } } else { change.rollback()?; checker.does_not_reproduce(); } - - if has_made_change == ProcessState::FileInvalidated { - invalidated_files.insert(file); - } } ProcessState::NoChange => { if self.options.no_color { diff --git a/src/processor/reaper.rs b/src/processor/reaper.rs index 5361d15..e5953c9 100644 --- a/src/processor/reaper.rs +++ b/src/processor/reaper.rs @@ -7,11 +7,7 @@ use anyhow::{Context, Result}; use proc_macro2::Span; use quote::ToTokens; use rustfix::{diagnostics::Diagnostic, Suggestion}; -use std::{ - collections::{HashMap, HashSet}, - ops::Range, - path::{Path, PathBuf}, -}; +use std::{collections::HashMap, ops::Range, path::Path}; use syn::{visit_mut::VisitMut, ImplItem, Item}; fn file_for_suggestion(suggestion: &Suggestion) -> &Path { @@ -103,16 +99,11 @@ impl Minimizer { struct DeleteUnusedFunctions { diags: Vec, build: Build, - invalid: HashSet, } impl DeleteUnusedFunctions { fn new(build: Build, diags: Vec) -> Self { - DeleteUnusedFunctions { - diags, - build, - invalid: HashSet::new(), - } + DeleteUnusedFunctions { diags, build } } } @@ -120,7 +111,6 @@ impl Pass for DeleteUnusedFunctions { fn refresh_state(&mut self) -> Result<()> { let (diags, _) = self.build.get_diags().context("getting diagnostics")?; self.diags = diags; - self.invalid.clear(); Ok(()) } @@ -130,18 +120,9 @@ impl Pass for DeleteUnusedFunctions { file: &SourceFile, checker: &mut super::PassController, ) -> ProcessState { - assert!( - !self.invalid.contains(file.path_no_fs_interact()), - "processing with invalid state" - ); - let mut visitor = FindUnusedFunction::new(file, self.diags.iter(), checker); visitor.visit_file_mut(krate); - if visitor.process_state == ProcessState::FileInvalidated { - self.invalid.insert(file.path_no_fs_interact().to_owned()); - } - visitor.process_state } @@ -235,8 +216,12 @@ impl<'a> FindUnusedFunction<'a> { match span_matches { 0 => true, 1 => { - self.process_state = ProcessState::FileInvalidated; - !self.checker.can_process(&self.current_path) + if self.checker.can_process(&self.current_path) { + self.process_state = ProcessState::FileInvalidated; + !self.checker.can_process(&self.current_path) + } else { + true + } } _ => { panic!("multiple dead_code spans matched identifier: {span_matches}."); From 78a3cec9d6d8188207107b172e687458b5887a44 Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 22:20:02 -0700 Subject: [PATCH 05/15] Minor reaper opti: don't do rustfix with no suggestions --- src/processor/reaper.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/processor/reaper.rs b/src/processor/reaper.rs index e5953c9..ea3991c 100644 --- a/src/processor/reaper.rs +++ b/src/processor/reaper.rs @@ -75,9 +75,17 @@ impl Minimizer { .copied() .cloned() .collect::>(); + if desired_suggestions.is_empty() { + continue; + } let result = rustfix::apply_suggestions(change.before_content().0, &desired_suggestions)?; + anyhow::ensure!( + result != change.before_content().0, + "Suggestions 'applied' but no changes made??" + ); + let result = syn::parse_file(&result).context("parsing file after rustfix")?; change.write(result)?; From 7e3dc837a4e1690ea8e899a5c51325d8d634ce2f Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 22:37:54 -0700 Subject: [PATCH 06/15] Allow bisecting privatizer use changes Currently all the use items have the same AstPath, which means that privatizer tries to change all of them at once. Which means that if *any* of them can't get privated, then *all* of them stay unprivated, with all the consequences.. --- src/passes/privatize.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/passes/privatize.rs b/src/passes/privatize.rs index c2aa30d..749237f 100644 --- a/src/passes/privatize.rs +++ b/src/passes/privatize.rs @@ -24,12 +24,32 @@ impl<'a> Visitor<'a> { impl VisitMut for Visitor<'_> { fn visit_visibility_mut(&mut self, vis: &mut Visibility) { if let Visibility::Public(_) = vis { + self.current_path.push("{{vis}}".to_string()); if self.checker.can_process(&self.current_path) { self.process_state = ProcessState::Changed; *vis = self.pub_crate.clone(); } + self.current_path.pop(); } } + fn visit_item_mut(&mut self, item: &mut syn::Item) { + match item { + syn::Item::Use(u) => { + if let Visibility::Public(_) = u.vis { + let mut path = self.current_path.clone(); + path.push(u.to_token_stream().to_string()); + if self.checker.can_process(&path) { + self.process_state = ProcessState::Changed; + u.vis = self.pub_crate.clone(); + } + path.pop(); + } + return; // early return; do not walk the child items + } + _ => {} + } + syn::visit_mut::visit_item_mut(self, item); + } tracking!(); } From fdfa1bed09e130825d66e8725df2dbd5d44be275 Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 15:15:35 -0700 Subject: [PATCH 07/15] Add a test for nested items removal (currently failing) --- full-tests/nested-items.rs | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 full-tests/nested-items.rs diff --git a/full-tests/nested-items.rs b/full-tests/nested-items.rs new file mode 100644 index 0000000..3f71f42 --- /dev/null +++ b/full-tests/nested-items.rs @@ -0,0 +1,8 @@ +pub mod foo { + /// ~MINIMIZE-ROOT good + pub fn good(){} + /// ~REQUIRE-DELETED bad + pub fn bad(){} +} +/// ~MINIMIZE-ROOT main +fn main(){} From f38632d83e0dc3cbf215401645af6cbfdfa8d2eb Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 15:27:01 -0700 Subject: [PATCH 08/15] Touch up controller logic to handle nested items well. The test in the previous commit now passes --- full-tests/nested-items2.rs | 24 +++++++++++++ src/processor/checker.rs | 68 ++++++++++++++++++++++++------------- src/processor/mod.rs | 12 ++++--- 3 files changed, 77 insertions(+), 27 deletions(-) create mode 100644 full-tests/nested-items2.rs diff --git a/full-tests/nested-items2.rs b/full-tests/nested-items2.rs new file mode 100644 index 0000000..ed02dda --- /dev/null +++ b/full-tests/nested-items2.rs @@ -0,0 +1,24 @@ +// this should all get deleted in a single swoop *and* not panic about it +/// ~REQUIRE-DELETED l1 +mod l1 { + mod l2 { + mod l3 { + mod l4{ + mod l5 { + fn foo(){} + fn bar(){} + mod l6 { + fn x1(){} + } + fn x2(){} + } + } + mod l4_2 { + fn y(){} + } + } + } + fn x8(){} +} +/// ~MINIMIZE-ROOT main +fn main(){} diff --git a/src/processor/checker.rs b/src/processor/checker.rs index 748fda2..e7da7a8 100644 --- a/src/processor/checker.rs +++ b/src/processor/checker.rs @@ -6,6 +6,14 @@ use self::worklist::Worklist; #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] struct AstPath(Vec); +impl AstPath { + fn has_prefix(&self, other: &AstPath) -> bool { + if self.0.len() < other.0.len() { + return false; + } + std::iter::zip(self.0.iter(), other.0.iter()).all(|(a, b)| a == b) + } +} impl Borrow<[String]> for AstPath { fn borrow(&self) -> &[String] { @@ -75,6 +83,20 @@ mod worklist { pub(super) fn pop(&mut self) -> Option> { self.0.pop() } + + // remove all the worklist items that would have been covered by the + // given ones. + // I.e. if we have already deleted the entire module, there's no need + // trying to delete that module's individual items anymore + pub(super) fn prune(&mut self, things: &std::collections::BTreeSet) { + for wl in &mut self.0 { + wl.retain(|path| { + // retain only if none of the things are a prefix of this path + things.iter().all(|thing| !path.has_prefix(thing)) + }) + } + self.0.retain(|wl| !wl.is_empty()); + } } } @@ -97,8 +119,9 @@ impl PassController { committed, failed: _, current, - worklist: _, + worklist, } => { + worklist.prune(current); committed.extend(mem::take(current)); self.next_in_worklist(); @@ -110,19 +133,8 @@ impl PassController { /// The changes did not reproduce the regression. Bisect further. pub fn does_not_reproduce(&mut self) { match &mut self.state { - PassControllerState::InitialCollection { candidates } => { - // Applying them all was too much, let's bisect! - let (current, first_worklist_item) = split_owned(mem::take(candidates)); - - let mut worklist = Worklist::new(); - worklist.push(first_worklist_item); - - self.state = PassControllerState::Bisecting { - committed: BTreeSet::new(), - failed: BTreeSet::new(), - current, - worklist, - }; + PassControllerState::InitialCollection { candidates: _ } => { + unreachable!("we should have made no changes on initial collection, what do you mean it does not reproduce?!?") } PassControllerState::Bisecting { committed, @@ -155,15 +167,24 @@ impl PassController { } } - /// The pass did not apply any changes. We're done. + /// The pass did not apply any changes. We're either done or just starting pub fn no_change(&mut self) { - match &self.state { + match &mut self.state { PassControllerState::InitialCollection { candidates } => { - assert!( - candidates.is_empty(), - "No change but received candidates. The responsible pass does not seem to track the ProcessState correctly: {candidates:?}" - ); - self.state = PassControllerState::Success; + if candidates.is_empty() { + self.state = PassControllerState::Success; + } else { + let current = mem::take(candidates) + .into_iter() + .collect::>(); + + self.state = PassControllerState::Bisecting { + committed: BTreeSet::new(), + failed: BTreeSet::new(), + current, + worklist: Worklist::new(), + }; + } } PassControllerState::Bisecting { current, .. } => { unreachable!("Pass said it didn't change anything in the bisection phase, nils forgot what this means: {current:?}"); @@ -184,9 +205,9 @@ impl PassController { pub fn can_process(&mut self, path: &[String]) -> bool { match &mut self.state { PassControllerState::InitialCollection { candidates } => { - // For the initial collection, we collect the candidate and apply them all. + // For the initial collection, we collect the candidate but don't apply anything candidates.push(AstPath(path.to_owned())); - true + false } PassControllerState::Bisecting { current, .. } => current.contains(path), PassControllerState::Success { .. } => { @@ -205,6 +226,7 @@ impl PassController { match worklist.pop() { Some(next) => { *current = next.into_iter().collect(); + trace!(?current, "current working set: "); } None => { self.state = PassControllerState::Success; diff --git a/src/processor/mod.rs b/src/processor/mod.rs index 3b7dadc..f167bda 100644 --- a/src/processor/mod.rs +++ b/src/processor/mod.rs @@ -187,6 +187,7 @@ impl Minimizer { // The logic for bisecting down lives in PassController. let mut checker = PassController::new(self.options.clone()); + let mut initial_pass = true; loop { let mut change = file.try_change(changes)?; let (_, krate) = change.before_content(); @@ -213,14 +214,17 @@ impl Minimizer { } } ProcessState::NoChange => { - if self.options.no_color { - info!("{file:?}: After {}: no changes", pass.name()); - } else { - info!("{file:?}: After {}: {}", pass.name(), "no changes".yellow()); + if !initial_pass { + if self.options.no_color { + info!("{file:?}: After {}: no changes", pass.name()); + } else { + info!("{file:?}: After {}: {}", pass.name(), "no changes".yellow()); + } } checker.no_change(); } } + initial_pass = false; if self.cancel.load(Ordering::SeqCst) { info!("Exiting early."); From 33c32bc0e70f3333441829ab7d884d2d3373a48f Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 16:04:09 -0700 Subject: [PATCH 09/15] Minor optimization for now-overlapping paths Naively, if we have items (1) foo, (2) foo::bar, (3) foo::baz, we would try to first try to remove all three of them, and then if that fails, try to remove just (1) and (2) ..except we should already know that that would fail since it still includes (1) which covers all three items anyway. So, try to be smarter about this and not do that. This is achieved by avoiding ever putting both foo and foo::whatever in the same try-set, as that gives no information on foo::whatever regardless of if the check passes or fails --- src/processor/checker.rs | 45 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/src/processor/checker.rs b/src/processor/checker.rs index e7da7a8..f8a7d45 100644 --- a/src/processor/checker.rs +++ b/src/processor/checker.rs @@ -174,15 +174,22 @@ impl PassController { if candidates.is_empty() { self.state = PassControllerState::Success; } else { - let current = mem::take(candidates) - .into_iter() - .collect::>(); + // We could just set `current=candidates; worklist=default()`, + // but we are doing a minor optimization to split overlapping items into separate tries, + // so as to reduce the number of bisection steps that yield literally no new info. + let layers = layer_candidates(mem::take(candidates)); + let mut worklist = Worklist::new(); + for layer in layers.into_iter().rev() { + // .rev() so that we add shorter paths last, and process them first + worklist.push(layer); + } + let current = worklist.pop().unwrap().into_iter().collect(); self.state = PassControllerState::Bisecting { committed: BTreeSet::new(), failed: BTreeSet::new(), current, - worklist: Worklist::new(), + worklist, }; } } @@ -261,3 +268,33 @@ fn split_owned, A: FromIterator, B: FromItera (first_half, second_half) } + +/// Split up a list of AstPath's into several sets such that none of those sets have +/// any of the AstPath's overlap. +/// I.e. so that we avoid having both ["foo"] and ["foo", "bar"] in the same set. +/// It is expected, but not guaranteed that the earlier sets would contain "less granular" items +/// (i.e. ["foo"] from the above example) and the latter sets would contain the "more granular" ones. +fn layer_candidates(mut candidates: Vec) -> Vec> { + candidates.sort(); // this *should* put less-granular/shorter-path items first + let mut layers: Vec> = vec![]; + for candidate in candidates { + let mut appropriate_layer_no = None; + for (no, layer) in layers.iter().enumerate() { + if !layer + .iter() + .any(|known_path| candidate.has_prefix(known_path)) + { + appropriate_layer_no = Some(no); + break; + } + } + match appropriate_layer_no { + Some(no) => layers[no].push(candidate), + None => { + let new_layer = vec![candidate]; + layers.push(new_layer); + } + } + } + layers +} From fdf0066e58f4dacfc3e821bddad52d3d65cb1653 Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 16:25:05 -0700 Subject: [PATCH 10/15] Add a test for grouped reexports (failing) --- full-tests/reexports.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 full-tests/reexports.rs diff --git a/full-tests/reexports.rs b/full-tests/reexports.rs new file mode 100644 index 0000000..e60730b --- /dev/null +++ b/full-tests/reexports.rs @@ -0,0 +1,12 @@ + +use hello::{thingy, whatever}; +mod hello{ + pub fn thingy(){} + /// ~REQUIRE-DELETED whatever + pub fn whatever(){} +} + +fn main(){ + "~MINIMIZE-ROOT let x = thingy"; + let x = thingy(); +} From a70aedada56b6734ef1e5378ee66c152ea54218c Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 18:32:52 -0700 Subject: [PATCH 11/15] Add a split-use pass. It transforms "use std::io::{Read, Write}" into "use std::io::Read; use std::io::Write" Which later allows to remove just precisely the statements we do not need, and leave rest be. The test from the last commit does not pass, but that is seemingly to do with the test harness setup, since it works fine locally. --- src/lib.rs | 1 + src/passes/mod.rs | 3 +- src/passes/split_use.rs | 139 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 src/passes/split_use.rs diff --git a/src/lib.rs b/src/lib.rs index dc8dd21..c5ce96c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -142,6 +142,7 @@ pub fn minimize(options: Options, stop: Arc) -> Result<()> { minimizer.run_passes([ passes::EverybodyLoops::default().boxed(), + passes::SplitUse::default().boxed(), passes::FieldDeleter::default().boxed(), passes::Privatize::default().boxed(), ])?; diff --git a/src/passes/mod.rs b/src/passes/mod.rs index b17bb05..31fd014 100644 --- a/src/passes/mod.rs +++ b/src/passes/mod.rs @@ -2,8 +2,9 @@ mod everybody_loops; mod field_deleter; mod item_deleter; mod privatize; +mod split_use; pub use self::{ everybody_loops::EverybodyLoops, field_deleter::FieldDeleter, item_deleter::ItemDeleter, - privatize::Privatize, + privatize::Privatize, split_use::SplitUse, }; diff --git a/src/passes/split_use.rs b/src/passes/split_use.rs new file mode 100644 index 0000000..7d1e203 --- /dev/null +++ b/src/passes/split_use.rs @@ -0,0 +1,139 @@ +use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; +use quote::ToTokens; + +use syn::{visit_mut::VisitMut, Item, ItemUse}; + +struct Visitor<'a> { + process_state: ProcessState, + current_path: Vec, + checker: &'a mut PassController, +} + +impl<'a> Visitor<'a> { + fn new(checker: &'a mut PassController) -> Self { + Self { + process_state: ProcessState::NoChange, + current_path: Vec::new(), + checker, + } + } + + // given a "some::group::{a, b::{c,d}, e}" tree, and assuming checker allows processing of (only) "some::group", + // returns a ["some::group::a", "some::group::b::{c,d}", "some::group::e"] list of trees. + fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &syn::UseTree) -> Vec { + // It would probably be nice if instead of *expanding* the whole "some::group" group, we could instead + // *extract* individual items ("some::group::a"), but that makes code much more convoluted, sadly + match tree { + syn::UseTree::Path(p) => { + self.current_path.push(p.ident.to_string()); + + let out = self + .expand_use_groups(top, &p.tree) + .into_iter() + .map(|x| { + let mut new = p.clone(); + new.tree = Box::new(x); + syn::UseTree::Path(new) + }) + .collect(); + + self.current_path.pop(); + out + } + syn::UseTree::Group(g) => { + let new_trees = g + .items + .iter() + .map(|subtree| self.expand_use_groups(top, subtree)) + .flatten() + .collect::>(); + + self.current_path.push("{{group}}".to_string()); + let can_process = self.checker.can_process(&self.current_path); + self.current_path.pop(); + if can_process { + self.process_state = ProcessState::Changed; + return new_trees; + } else { + // Do not expand the group. + // recreate the UseTree::Group item (but with new subtrees), and return a single-element list + let mut g = g.clone(); + g.items.clear(); + g.items.extend(new_trees); + return vec![syn::UseTree::Group(g)]; + } + } + _ => return vec![tree.clone()], + } + } + + fn visit_item_list(&mut self, items: &mut Vec) { + let mut pos = 0; // index into the `items` list + while pos < items.len() { + let item_use: ItemUse = { + match &items[pos] { + Item::Use(u) => u.clone(), + _ => { + pos += 1; // if it's not a `use`` - simply advance to the next item + continue; + } + } + }; + + let new_use_trees = self.expand_use_groups(&item_use, &item_use.tree); + // decorate each of the UseTree with a `use` keyword (and any attributes inherited) + let new_uses = new_use_trees.into_iter().map(|x| { + let mut new = item_use.clone(); + new.tree = x; + syn::Item::Use(new) + }); + + let step = new_uses.len(); + // replace the old use with the new uses + items.splice(pos..pos + 1, new_uses); + pos += step; // do not process freshly inserted items + } + } +} + +impl VisitMut for Visitor<'_> { + fn visit_item_mod_mut(&mut self, item_mod: &mut syn::ItemMod) { + self.current_path.push(item_mod.ident.to_string()); + if let Some((_, items)) = &mut item_mod.content { + self.visit_item_list(items); + } + syn::visit_mut::visit_item_mod_mut(self, item_mod); + self.current_path.pop(); + } + fn visit_file_mut(&mut self, file: &mut syn::File) { + self.visit_item_list(&mut file.items); + syn::visit_mut::visit_file_mut(self, file); + } + + tracking!(visit_item_fn_mut); + tracking!(visit_impl_item_method_mut); + tracking!(visit_item_impl_mut); + tracking!(visit_field_mut); + tracking!(visit_item_struct_mut); + tracking!(visit_item_trait_mut); +} + +#[derive(Default)] +pub struct SplitUse {} + +impl Pass for SplitUse { + fn process_file( + &mut self, + krate: &mut syn::File, + _: &SourceFile, + checker: &mut PassController, + ) -> ProcessState { + let mut visitor = Visitor::new(checker); + visitor.visit_file_mut(krate); + visitor.process_state + } + + fn name(&self) -> &'static str { + "split-use" + } +} From 1e46e75c6e8ba52a3a00e7823c4002074a7c45fb Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 18:37:55 -0700 Subject: [PATCH 12/15] Teach item-deleter to delete imports. Because reaper is finnicky and does not always work for whatever reason. The new test from a couple commits ago now passes. --- src/passes/item_deleter.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/passes/item_deleter.rs b/src/passes/item_deleter.rs index 75cc9bf..7537079 100644 --- a/src/passes/item_deleter.rs +++ b/src/passes/item_deleter.rs @@ -1,7 +1,8 @@ use quote::ToTokens; use syn::{ visit_mut::VisitMut, Item, ItemConst, ItemEnum, ItemExternCrate, ItemFn, ItemMacro, ItemMacro2, - ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, Signature, + ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, ItemUse, + Signature, }; use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; @@ -73,9 +74,17 @@ impl<'a> Visitor<'a> { self.current_path.pop(); should_retain } + // We would hope for the unused imports pass to catch all of these + // but sadly that's not the case + Item::Use(ItemUse { tree, .. }) => { + self.current_path.push(tree.to_token_stream().to_string()); + + let should_retain = self.should_retain_item(); + + self.current_path.pop(); + should_retain + } Item::ForeignMod(_) => true, - // We hope for the unused imports to show them all. - Item::Use(_) => true, Item::Verbatim(_) => true, _ => true, } From 2031a180b78dd42d403342d1542b04119dfba774 Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 20:34:20 -0700 Subject: [PATCH 13/15] Properly handle imports ending in ::self --- full-tests/reexports2.rs | 15 ++++++++++++ src/passes/split_use.rs | 52 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 full-tests/reexports2.rs diff --git a/full-tests/reexports2.rs b/full-tests/reexports2.rs new file mode 100644 index 0000000..ea1b6ea --- /dev/null +++ b/full-tests/reexports2.rs @@ -0,0 +1,15 @@ +mod A { + /// ~REQUIRE-DELETED S1 + pub struct S1; + pub struct S2; +} + +mod B { + use crate::A::{self, S1}; + pub use A::S2 as thingy; +} + +fn main() { + "~MINIMIZE-ROOT let x = B::thingy"; + let x = B::thingy; +} diff --git a/src/passes/split_use.rs b/src/passes/split_use.rs index 7d1e203..ee3f355 100644 --- a/src/passes/split_use.rs +++ b/src/passes/split_use.rs @@ -1,7 +1,9 @@ +use std::ops::DerefMut; + use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; use quote::ToTokens; -use syn::{visit_mut::VisitMut, Item, ItemUse}; +use syn::{visit_mut::VisitMut, Item, ItemUse, UseName, UsePath, UseRename, UseTree}; struct Visitor<'a> { process_state: ProcessState, @@ -20,11 +22,11 @@ impl<'a> Visitor<'a> { // given a "some::group::{a, b::{c,d}, e}" tree, and assuming checker allows processing of (only) "some::group", // returns a ["some::group::a", "some::group::b::{c,d}", "some::group::e"] list of trees. - fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &syn::UseTree) -> Vec { + fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &UseTree) -> Vec { // It would probably be nice if instead of *expanding* the whole "some::group" group, we could instead // *extract* individual items ("some::group::a"), but that makes code much more convoluted, sadly match tree { - syn::UseTree::Path(p) => { + UseTree::Path(p) => { self.current_path.push(p.ident.to_string()); let out = self @@ -33,14 +35,14 @@ impl<'a> Visitor<'a> { .map(|x| { let mut new = p.clone(); new.tree = Box::new(x); - syn::UseTree::Path(new) + UseTree::Path(new) }) .collect(); self.current_path.pop(); out } - syn::UseTree::Group(g) => { + UseTree::Group(g) => { let new_trees = g .items .iter() @@ -51,6 +53,7 @@ impl<'a> Visitor<'a> { self.current_path.push("{{group}}".to_string()); let can_process = self.checker.can_process(&self.current_path); self.current_path.pop(); + if can_process { self.process_state = ProcessState::Changed; return new_trees; @@ -85,6 +88,7 @@ impl<'a> Visitor<'a> { let new_uses = new_use_trees.into_iter().map(|x| { let mut new = item_use.clone(); new.tree = x; + trim_trailing_self(&mut new.tree); syn::Item::Use(new) }); @@ -96,6 +100,44 @@ impl<'a> Visitor<'a> { } } +// It is legal to write "use module::{self};", but not "use module::self;". +// If we do end up with the latter on our hands, convert it to "use module;" instead. +fn trim_trailing_self(use_tree: &mut UseTree) { + match use_tree { + UseTree::Path(UsePath { + tree: subtree, + ident: base_ident, + .. + }) => match subtree.deref_mut() { + UseTree::Name(UseName { ident: sub_ident }) => { + if sub_ident == "self" { + *use_tree = UseTree::Name(UseName { + ident: base_ident.clone(), + }); + return; + } + } + UseTree::Rename(syn::UseRename { + ident: sub_ident, + rename, + as_token, + }) => { + if sub_ident == "self" { + *use_tree = UseTree::Rename(UseRename { + ident: base_ident.clone(), + rename: rename.clone(), + as_token: as_token.clone(), + }); + return; + } + } + UseTree::Path(_) => trim_trailing_self(&mut *subtree), + _ => {} + }, + _ => {} + } +} + impl VisitMut for Visitor<'_> { fn visit_item_mod_mut(&mut self, item_mod: &mut syn::ItemMod) { self.current_path.push(item_mod.ident.to_string()); From 21a484379159761d9b43848853e63c3c5855dac8 Mon Sep 17 00:00:00 2001 From: moxian Date: Mon, 31 Mar 2025 03:35:10 -0700 Subject: [PATCH 14/15] Actually, let's not bisect imports by default I thought it was a good idea, but now that reaper is not completely busted, i see that this is taking a bit longer than i'd like.. It's easier to just run reaper again --- src/lib.rs | 5 +++++ src/passes/item_deleter.rs | 2 +- testsuite/src/lib.rs | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c5ce96c..24ca3ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,6 +105,10 @@ pub struct Options { #[arg(skip)] pub no_delete_functions: bool, + + /// Remove individual use statements manually, instead of relying on rustc lints output + #[arg(long)] + pub bisect_delete_imports: bool, } #[derive(Debug, Clone)] @@ -187,6 +191,7 @@ impl Default for Options { script_path_lints: None, ignore_file: Vec::new(), no_delete_functions: false, + bisect_delete_imports: false, } } } diff --git a/src/passes/item_deleter.rs b/src/passes/item_deleter.rs index 7537079..c1f3b50 100644 --- a/src/passes/item_deleter.rs +++ b/src/passes/item_deleter.rs @@ -76,7 +76,7 @@ impl<'a> Visitor<'a> { } // We would hope for the unused imports pass to catch all of these // but sadly that's not the case - Item::Use(ItemUse { tree, .. }) => { + Item::Use(ItemUse { tree, .. }) if self.checker.options.bisect_delete_imports => { self.current_path.push(tree.to_token_stream().to_string()); let should_retain = self.should_retain_item(); diff --git a/testsuite/src/lib.rs b/testsuite/src/lib.rs index 8c7e031..f841e5c 100644 --- a/testsuite/src/lib.rs +++ b/testsuite/src/lib.rs @@ -170,6 +170,7 @@ fn build(cargo: &Path, path: &Path, regression_checker_path: &Path) -> Result<() flag.push(regression_checker_path); flag }); + cmd.arg("--bisect-delete-imports"); let minimize_roots = start_roots.join(","); From ccb361251130b85efb0177fa412bbfa84052eb23 Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 19:12:10 -0700 Subject: [PATCH 15/15] Allow specifying NOT-passes on the command line --- src/lib.rs | 4 ++-- src/processor/mod.rs | 38 +++++++++++++++++++++++++++++++------- src/processor/reaper.rs | 2 +- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 24ca3ba..f297fef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ mod expand; use anyhow::{Context, Result}; use dylib_flag::RustFunction; -use processor::Minimizer; +use processor::{Minimizer, PassSelection}; use tracing::Level; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Registry}; @@ -83,7 +83,7 @@ pub struct Options { /// A comma-seperated list of passes that should be enabled. By default, all passes are enabled. #[arg(long)] - pub passes: Option, + pub passes: Option, /// A path to a script that is run to check whether code reproduces. When it exits with code 0, the /// problem reproduces. If `--script-path-lints` isn't set, this script is also run to get lints. diff --git a/src/processor/mod.rs b/src/processor/mod.rs index f167bda..2e22d86 100644 --- a/src/processor/mod.rs +++ b/src/processor/mod.rs @@ -50,6 +50,31 @@ pub(crate) enum ProcessState { FileInvalidated, } +#[derive(Debug, Clone)] +pub enum PassSelection { + Enable(Vec), + Disable(Vec), +} +impl std::str::FromStr for PassSelection { + type Err = &'static str; + fn from_str(s: &str) -> std::result::Result { + let values = s.split(',').collect::>(); + let have_negative = values.iter().any(|v| v.starts_with("no-")); + if have_negative && !values.iter().all(|v| v.starts_with("no-")) { + return Err("Pass exclusion is supported, by mixing positive pass selection with negative is not allowed (because it's pointless and confusing)"); + } + let actual_values = values + .into_iter() + .map(|v| v.strip_prefix("no-").unwrap_or(v).to_string()) + .collect(); + if !have_negative { + Ok(PassSelection::Enable(actual_values)) + } else { + Ok(PassSelection::Disable(actual_values)) + } + } +} + #[derive(Debug)] pub(crate) struct Minimizer { files: Vec, @@ -59,13 +84,12 @@ pub(crate) struct Minimizer { } impl Minimizer { - fn pass_disabled(&self, name: &str) -> bool { - if let Some(passes) = &self.options.passes { - if !passes.split(",").any(|allowed| name == allowed) { - return true; - } + fn pass_enabled(&self, name: &str) -> bool { + match &self.options.passes { + None => true, + Some(PassSelection::Enable(v)) => v.iter().any(|allowed| name == allowed), + Some(PassSelection::Disable(v)) => v.iter().all(|forbidden| name != forbidden), } - false } pub(crate) fn new_glob_dir( @@ -131,7 +155,7 @@ impl Minimizer { inital_build.require_reproduction("Initial")?; for mut pass in passes { - if self.pass_disabled(pass.name()) { + if !self.pass_enabled(pass.name()) { continue; } self.run_pass(&mut *pass)?; diff --git a/src/processor/reaper.rs b/src/processor/reaper.rs index ea3991c..25e0567 100644 --- a/src/processor/reaper.rs +++ b/src/processor/reaper.rs @@ -18,7 +18,7 @@ const PASS_NAME: &str = "delete-unused-functions"; impl Minimizer { pub fn delete_dead_code(&mut self) -> Result<()> { - if self.pass_disabled(PASS_NAME) { + if !self.pass_enabled(PASS_NAME) { return Ok(()); }