From 5d4605c8cc9c6129246ea4516ce9c5a6f9f6e50e Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 16:04:09 -0700 Subject: [PATCH 1/7] Minor optimization for now-overlapping paths Naively, if we have items (1) foo, (2) foo::bar, (3) foo::baz, we would try to first try to remove all three of them, and then if that fails, try to remove just (1) and (2) ..except we should already know that that would fail since it still includes (1) which covers all three items anyway. So, try to be smarter about this and not do that. This is achieved by avoiding ever putting both foo and foo::whatever in the same try-set, as that gives no information on foo::whatever regardless of if the check passes or fails --- src/processor/checker.rs | 45 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/src/processor/checker.rs b/src/processor/checker.rs index e7da7a8..e279f77 100644 --- a/src/processor/checker.rs +++ b/src/processor/checker.rs @@ -174,15 +174,22 @@ impl PassController { if candidates.is_empty() { self.state = PassControllerState::Success; } else { - let current = mem::take(candidates) - .into_iter() - .collect::>(); + // We could just set `current=candidates; worklist=default()`, + // but we are doing a minor optimization to split overlapping items into separate tries, + // so as to reduce the number of bisection steps that yield literally no new info. + let layers = layer_candidates(mem::take(candidates)); + let mut worklist = Worklist::new(); + for layer in layers.into_iter().rev() { + // .rev() so that we add shorter paths last, and process them first + worklist.push(layer); + } + let current = worklist.pop().unwrap().into_iter().collect(); self.state = PassControllerState::Bisecting { committed: BTreeSet::new(), failed: BTreeSet::new(), current, - worklist: Worklist::new(), + worklist, }; } } @@ -261,3 +268,33 @@ fn split_owned, A: FromIterator, B: FromItera (first_half, second_half) } + +/// Split up a list of AstPath's into several sets such that none of those sets have +/// any of the AstPath's overlap. +/// I.e. so that we avoid having both ["foo"] and ["foo", "bar"] in the same set. +/// It is expected, but not guaranteed that the earlier sets would contain "less granular" items +/// (i.e. ["foo"] from the above example) and the latter sets would contain the "more granular" ones. +fn layer_candidates(mut candidates: Vec) -> Vec> { + candidates.sort(); // this *should* put less-granular/shorter-path items first + let mut layers: Vec> = vec![]; + for candidate in candidates { + let mut appropriate_layer_no = None; + for (no, layer) in layers.iter().enumerate() { + if !layer + .iter() + .all(|known_path| candidate.has_prefix(known_path)) + { + appropriate_layer_no = Some(no); + break; + } + } + match appropriate_layer_no { + Some(no) => layers[no].push(candidate), + None => { + let new_layer = vec![candidate]; + layers.push(new_layer); + } + } + } + layers +} From 963a1faf6964ffdf11457bf58104bf006a1a2010 Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 16:25:05 -0700 Subject: [PATCH 2/7] Add a test for grouped reexports (failing) --- full-tests/reexports.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 full-tests/reexports.rs diff --git a/full-tests/reexports.rs b/full-tests/reexports.rs new file mode 100644 index 0000000..e60730b --- /dev/null +++ b/full-tests/reexports.rs @@ -0,0 +1,12 @@ + +use hello::{thingy, whatever}; +mod hello{ + pub fn thingy(){} + /// ~REQUIRE-DELETED whatever + pub fn whatever(){} +} + +fn main(){ + "~MINIMIZE-ROOT let x = thingy"; + let x = thingy(); +} From 1f6cec7d7be0892d9075873db2645aff726006bf Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 18:32:52 -0700 Subject: [PATCH 3/7] Add a split-use pass. It transforms "use std::io::{Read, Write}" into "use std::io::Read; use std::io::Write" Which later allows to remove just precisely the statements we do not need, and leave rest be. The test from the last commit does not pass, but that is seemingly to do with the test harness setup, since it works fine locally. --- src/lib.rs | 1 + src/passes/mod.rs | 3 +- src/passes/split_use.rs | 139 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 src/passes/split_use.rs diff --git a/src/lib.rs b/src/lib.rs index dc8dd21..c5ce96c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -142,6 +142,7 @@ pub fn minimize(options: Options, stop: Arc) -> Result<()> { minimizer.run_passes([ passes::EverybodyLoops::default().boxed(), + passes::SplitUse::default().boxed(), passes::FieldDeleter::default().boxed(), passes::Privatize::default().boxed(), ])?; diff --git a/src/passes/mod.rs b/src/passes/mod.rs index b17bb05..31fd014 100644 --- a/src/passes/mod.rs +++ b/src/passes/mod.rs @@ -2,8 +2,9 @@ mod everybody_loops; mod field_deleter; mod item_deleter; mod privatize; +mod split_use; pub use self::{ everybody_loops::EverybodyLoops, field_deleter::FieldDeleter, item_deleter::ItemDeleter, - privatize::Privatize, + privatize::Privatize, split_use::SplitUse, }; diff --git a/src/passes/split_use.rs b/src/passes/split_use.rs new file mode 100644 index 0000000..7d1e203 --- /dev/null +++ b/src/passes/split_use.rs @@ -0,0 +1,139 @@ +use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; +use quote::ToTokens; + +use syn::{visit_mut::VisitMut, Item, ItemUse}; + +struct Visitor<'a> { + process_state: ProcessState, + current_path: Vec, + checker: &'a mut PassController, +} + +impl<'a> Visitor<'a> { + fn new(checker: &'a mut PassController) -> Self { + Self { + process_state: ProcessState::NoChange, + current_path: Vec::new(), + checker, + } + } + + // given a "some::group::{a, b::{c,d}, e}" tree, and assuming checker allows processing of (only) "some::group", + // returns a ["some::group::a", "some::group::b::{c,d}", "some::group::e"] list of trees. + fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &syn::UseTree) -> Vec { + // It would probably be nice if instead of *expanding* the whole "some::group" group, we could instead + // *extract* individual items ("some::group::a"), but that makes code much more convoluted, sadly + match tree { + syn::UseTree::Path(p) => { + self.current_path.push(p.ident.to_string()); + + let out = self + .expand_use_groups(top, &p.tree) + .into_iter() + .map(|x| { + let mut new = p.clone(); + new.tree = Box::new(x); + syn::UseTree::Path(new) + }) + .collect(); + + self.current_path.pop(); + out + } + syn::UseTree::Group(g) => { + let new_trees = g + .items + .iter() + .map(|subtree| self.expand_use_groups(top, subtree)) + .flatten() + .collect::>(); + + self.current_path.push("{{group}}".to_string()); + let can_process = self.checker.can_process(&self.current_path); + self.current_path.pop(); + if can_process { + self.process_state = ProcessState::Changed; + return new_trees; + } else { + // Do not expand the group. + // recreate the UseTree::Group item (but with new subtrees), and return a single-element list + let mut g = g.clone(); + g.items.clear(); + g.items.extend(new_trees); + return vec![syn::UseTree::Group(g)]; + } + } + _ => return vec![tree.clone()], + } + } + + fn visit_item_list(&mut self, items: &mut Vec) { + let mut pos = 0; // index into the `items` list + while pos < items.len() { + let item_use: ItemUse = { + match &items[pos] { + Item::Use(u) => u.clone(), + _ => { + pos += 1; // if it's not a `use`` - simply advance to the next item + continue; + } + } + }; + + let new_use_trees = self.expand_use_groups(&item_use, &item_use.tree); + // decorate each of the UseTree with a `use` keyword (and any attributes inherited) + let new_uses = new_use_trees.into_iter().map(|x| { + let mut new = item_use.clone(); + new.tree = x; + syn::Item::Use(new) + }); + + let step = new_uses.len(); + // replace the old use with the new uses + items.splice(pos..pos + 1, new_uses); + pos += step; // do not process freshly inserted items + } + } +} + +impl VisitMut for Visitor<'_> { + fn visit_item_mod_mut(&mut self, item_mod: &mut syn::ItemMod) { + self.current_path.push(item_mod.ident.to_string()); + if let Some((_, items)) = &mut item_mod.content { + self.visit_item_list(items); + } + syn::visit_mut::visit_item_mod_mut(self, item_mod); + self.current_path.pop(); + } + fn visit_file_mut(&mut self, file: &mut syn::File) { + self.visit_item_list(&mut file.items); + syn::visit_mut::visit_file_mut(self, file); + } + + tracking!(visit_item_fn_mut); + tracking!(visit_impl_item_method_mut); + tracking!(visit_item_impl_mut); + tracking!(visit_field_mut); + tracking!(visit_item_struct_mut); + tracking!(visit_item_trait_mut); +} + +#[derive(Default)] +pub struct SplitUse {} + +impl Pass for SplitUse { + fn process_file( + &mut self, + krate: &mut syn::File, + _: &SourceFile, + checker: &mut PassController, + ) -> ProcessState { + let mut visitor = Visitor::new(checker); + visitor.visit_file_mut(krate); + visitor.process_state + } + + fn name(&self) -> &'static str { + "split-use" + } +} From f7b1ea31eeeafd2990e019493b12ac7a31ebcc4d Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 18:37:55 -0700 Subject: [PATCH 4/7] Teach item-deleter to delete imports. Because reaper is finnicky and does not always work for whatever reason. The new test from a couple commits ago now passes. --- src/passes/item_deleter.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/passes/item_deleter.rs b/src/passes/item_deleter.rs index 75cc9bf..7537079 100644 --- a/src/passes/item_deleter.rs +++ b/src/passes/item_deleter.rs @@ -1,7 +1,8 @@ use quote::ToTokens; use syn::{ visit_mut::VisitMut, Item, ItemConst, ItemEnum, ItemExternCrate, ItemFn, ItemMacro, ItemMacro2, - ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, Signature, + ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, ItemUse, + Signature, }; use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; @@ -73,9 +74,17 @@ impl<'a> Visitor<'a> { self.current_path.pop(); should_retain } + // We would hope for the unused imports pass to catch all of these + // but sadly that's not the case + Item::Use(ItemUse { tree, .. }) => { + self.current_path.push(tree.to_token_stream().to_string()); + + let should_retain = self.should_retain_item(); + + self.current_path.pop(); + should_retain + } Item::ForeignMod(_) => true, - // We hope for the unused imports to show them all. - Item::Use(_) => true, Item::Verbatim(_) => true, _ => true, } From 4af04f2191a33e56b9813be9e92f495a35487bec Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 20:34:20 -0700 Subject: [PATCH 5/7] Properly handle imports ending in ::self --- full-tests/reexports2.rs | 15 ++++++++++++ src/passes/split_use.rs | 52 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 full-tests/reexports2.rs diff --git a/full-tests/reexports2.rs b/full-tests/reexports2.rs new file mode 100644 index 0000000..ea1b6ea --- /dev/null +++ b/full-tests/reexports2.rs @@ -0,0 +1,15 @@ +mod A { + /// ~REQUIRE-DELETED S1 + pub struct S1; + pub struct S2; +} + +mod B { + use crate::A::{self, S1}; + pub use A::S2 as thingy; +} + +fn main() { + "~MINIMIZE-ROOT let x = B::thingy"; + let x = B::thingy; +} diff --git a/src/passes/split_use.rs b/src/passes/split_use.rs index 7d1e203..ee3f355 100644 --- a/src/passes/split_use.rs +++ b/src/passes/split_use.rs @@ -1,7 +1,9 @@ +use std::ops::DerefMut; + use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; use quote::ToTokens; -use syn::{visit_mut::VisitMut, Item, ItemUse}; +use syn::{visit_mut::VisitMut, Item, ItemUse, UseName, UsePath, UseRename, UseTree}; struct Visitor<'a> { process_state: ProcessState, @@ -20,11 +22,11 @@ impl<'a> Visitor<'a> { // given a "some::group::{a, b::{c,d}, e}" tree, and assuming checker allows processing of (only) "some::group", // returns a ["some::group::a", "some::group::b::{c,d}", "some::group::e"] list of trees. - fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &syn::UseTree) -> Vec { + fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &UseTree) -> Vec { // It would probably be nice if instead of *expanding* the whole "some::group" group, we could instead // *extract* individual items ("some::group::a"), but that makes code much more convoluted, sadly match tree { - syn::UseTree::Path(p) => { + UseTree::Path(p) => { self.current_path.push(p.ident.to_string()); let out = self @@ -33,14 +35,14 @@ impl<'a> Visitor<'a> { .map(|x| { let mut new = p.clone(); new.tree = Box::new(x); - syn::UseTree::Path(new) + UseTree::Path(new) }) .collect(); self.current_path.pop(); out } - syn::UseTree::Group(g) => { + UseTree::Group(g) => { let new_trees = g .items .iter() @@ -51,6 +53,7 @@ impl<'a> Visitor<'a> { self.current_path.push("{{group}}".to_string()); let can_process = self.checker.can_process(&self.current_path); self.current_path.pop(); + if can_process { self.process_state = ProcessState::Changed; return new_trees; @@ -85,6 +88,7 @@ impl<'a> Visitor<'a> { let new_uses = new_use_trees.into_iter().map(|x| { let mut new = item_use.clone(); new.tree = x; + trim_trailing_self(&mut new.tree); syn::Item::Use(new) }); @@ -96,6 +100,44 @@ impl<'a> Visitor<'a> { } } +// It is legal to write "use module::{self};", but not "use module::self;". +// If we do end up with the latter on our hands, convert it to "use module;" instead. +fn trim_trailing_self(use_tree: &mut UseTree) { + match use_tree { + UseTree::Path(UsePath { + tree: subtree, + ident: base_ident, + .. + }) => match subtree.deref_mut() { + UseTree::Name(UseName { ident: sub_ident }) => { + if sub_ident == "self" { + *use_tree = UseTree::Name(UseName { + ident: base_ident.clone(), + }); + return; + } + } + UseTree::Rename(syn::UseRename { + ident: sub_ident, + rename, + as_token, + }) => { + if sub_ident == "self" { + *use_tree = UseTree::Rename(UseRename { + ident: base_ident.clone(), + rename: rename.clone(), + as_token: as_token.clone(), + }); + return; + } + } + UseTree::Path(_) => trim_trailing_self(&mut *subtree), + _ => {} + }, + _ => {} + } +} + impl VisitMut for Visitor<'_> { fn visit_item_mod_mut(&mut self, item_mod: &mut syn::ItemMod) { self.current_path.push(item_mod.ident.to_string()); From 7b28f80cd6698f79c1572b794c01141aafaff8c5 Mon Sep 17 00:00:00 2001 From: moxian Date: Mon, 31 Mar 2025 03:35:10 -0700 Subject: [PATCH 6/7] Actually, let's not bisect imports by default I thought it was a good idea, but now that reaper is not completely busted, i see that this is taking a bit longer than i'd like.. It's easier to just run reaper again --- src/lib.rs | 5 +++++ src/passes/item_deleter.rs | 2 +- testsuite/src/lib.rs | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c5ce96c..24ca3ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,6 +105,10 @@ pub struct Options { #[arg(skip)] pub no_delete_functions: bool, + + /// Remove individual use statements manually, instead of relying on rustc lints output + #[arg(long)] + pub bisect_delete_imports: bool, } #[derive(Debug, Clone)] @@ -187,6 +191,7 @@ impl Default for Options { script_path_lints: None, ignore_file: Vec::new(), no_delete_functions: false, + bisect_delete_imports: false, } } } diff --git a/src/passes/item_deleter.rs b/src/passes/item_deleter.rs index 7537079..c1f3b50 100644 --- a/src/passes/item_deleter.rs +++ b/src/passes/item_deleter.rs @@ -76,7 +76,7 @@ impl<'a> Visitor<'a> { } // We would hope for the unused imports pass to catch all of these // but sadly that's not the case - Item::Use(ItemUse { tree, .. }) => { + Item::Use(ItemUse { tree, .. }) if self.checker.options.bisect_delete_imports => { self.current_path.push(tree.to_token_stream().to_string()); let should_retain = self.should_retain_item(); diff --git a/testsuite/src/lib.rs b/testsuite/src/lib.rs index 8c7e031..f841e5c 100644 --- a/testsuite/src/lib.rs +++ b/testsuite/src/lib.rs @@ -170,6 +170,7 @@ fn build(cargo: &Path, path: &Path, regression_checker_path: &Path) -> Result<() flag.push(regression_checker_path); flag }); + cmd.arg("--bisect-delete-imports"); let minimize_roots = start_roots.join(","); From 6c67c7e12dfab5f081eb9fb3ec5d3e7f3b9911fc Mon Sep 17 00:00:00 2001 From: moxian Date: Sun, 30 Mar 2025 19:12:10 -0700 Subject: [PATCH 7/7] Allow specifying NOT-passes on the command line --- src/lib.rs | 4 ++-- src/processor/mod.rs | 38 +++++++++++++++++++++++++++++++------- src/processor/reaper.rs | 2 +- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 24ca3ba..f297fef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ mod expand; use anyhow::{Context, Result}; use dylib_flag::RustFunction; -use processor::Minimizer; +use processor::{Minimizer, PassSelection}; use tracing::Level; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Registry}; @@ -83,7 +83,7 @@ pub struct Options { /// A comma-seperated list of passes that should be enabled. By default, all passes are enabled. #[arg(long)] - pub passes: Option, + pub passes: Option, /// A path to a script that is run to check whether code reproduces. When it exits with code 0, the /// problem reproduces. If `--script-path-lints` isn't set, this script is also run to get lints. diff --git a/src/processor/mod.rs b/src/processor/mod.rs index f167bda..2e22d86 100644 --- a/src/processor/mod.rs +++ b/src/processor/mod.rs @@ -50,6 +50,31 @@ pub(crate) enum ProcessState { FileInvalidated, } +#[derive(Debug, Clone)] +pub enum PassSelection { + Enable(Vec), + Disable(Vec), +} +impl std::str::FromStr for PassSelection { + type Err = &'static str; + fn from_str(s: &str) -> std::result::Result { + let values = s.split(',').collect::>(); + let have_negative = values.iter().any(|v| v.starts_with("no-")); + if have_negative && !values.iter().all(|v| v.starts_with("no-")) { + return Err("Pass exclusion is supported, by mixing positive pass selection with negative is not allowed (because it's pointless and confusing)"); + } + let actual_values = values + .into_iter() + .map(|v| v.strip_prefix("no-").unwrap_or(v).to_string()) + .collect(); + if !have_negative { + Ok(PassSelection::Enable(actual_values)) + } else { + Ok(PassSelection::Disable(actual_values)) + } + } +} + #[derive(Debug)] pub(crate) struct Minimizer { files: Vec, @@ -59,13 +84,12 @@ pub(crate) struct Minimizer { } impl Minimizer { - fn pass_disabled(&self, name: &str) -> bool { - if let Some(passes) = &self.options.passes { - if !passes.split(",").any(|allowed| name == allowed) { - return true; - } + fn pass_enabled(&self, name: &str) -> bool { + match &self.options.passes { + None => true, + Some(PassSelection::Enable(v)) => v.iter().any(|allowed| name == allowed), + Some(PassSelection::Disable(v)) => v.iter().all(|forbidden| name != forbidden), } - false } pub(crate) fn new_glob_dir( @@ -131,7 +155,7 @@ impl Minimizer { inital_build.require_reproduction("Initial")?; for mut pass in passes { - if self.pass_disabled(pass.name()) { + if !self.pass_enabled(pass.name()) { continue; } self.run_pass(&mut *pass)?; diff --git a/src/processor/reaper.rs b/src/processor/reaper.rs index ea3991c..25e0567 100644 --- a/src/processor/reaper.rs +++ b/src/processor/reaper.rs @@ -18,7 +18,7 @@ const PASS_NAME: &str = "delete-unused-functions"; impl Minimizer { pub fn delete_dead_code(&mut self) -> Result<()> { - if self.pass_disabled(PASS_NAME) { + if !self.pass_enabled(PASS_NAME) { return Ok(()); }