diff --git a/full-tests/reexports.rs b/full-tests/reexports.rs new file mode 100644 index 0000000..e60730b --- /dev/null +++ b/full-tests/reexports.rs @@ -0,0 +1,12 @@ + +use hello::{thingy, whatever}; +mod hello{ + pub fn thingy(){} + /// ~REQUIRE-DELETED whatever + pub fn whatever(){} +} + +fn main(){ + "~MINIMIZE-ROOT let x = thingy"; + let x = thingy(); +} diff --git a/full-tests/reexports2.rs b/full-tests/reexports2.rs new file mode 100644 index 0000000..ea1b6ea --- /dev/null +++ b/full-tests/reexports2.rs @@ -0,0 +1,15 @@ +mod A { + /// ~REQUIRE-DELETED S1 + pub struct S1; + pub struct S2; +} + +mod B { + use crate::A::{self, S1}; + pub use A::S2 as thingy; +} + +fn main() { + "~MINIMIZE-ROOT let x = B::thingy"; + let x = B::thingy; +} diff --git a/src/lib.rs b/src/lib.rs index dc8dd21..f297fef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ mod expand; use anyhow::{Context, Result}; use dylib_flag::RustFunction; -use processor::Minimizer; +use processor::{Minimizer, PassSelection}; use tracing::Level; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Registry}; @@ -83,7 +83,7 @@ pub struct Options { /// A comma-seperated list of passes that should be enabled. By default, all passes are enabled. #[arg(long)] - pub passes: Option, + pub passes: Option, /// A path to a script that is run to check whether code reproduces. When it exits with code 0, the /// problem reproduces. If `--script-path-lints` isn't set, this script is also run to get lints. @@ -105,6 +105,10 @@ pub struct Options { #[arg(skip)] pub no_delete_functions: bool, + + /// Remove individual use statements manually, instead of relying on rustc lints output + #[arg(long)] + pub bisect_delete_imports: bool, } #[derive(Debug, Clone)] @@ -142,6 +146,7 @@ pub fn minimize(options: Options, stop: Arc) -> Result<()> { minimizer.run_passes([ passes::EverybodyLoops::default().boxed(), + passes::SplitUse::default().boxed(), passes::FieldDeleter::default().boxed(), passes::Privatize::default().boxed(), ])?; @@ -186,6 +191,7 @@ impl Default for Options { script_path_lints: None, ignore_file: Vec::new(), no_delete_functions: false, + bisect_delete_imports: false, } } } diff --git a/src/passes/item_deleter.rs b/src/passes/item_deleter.rs index 75cc9bf..c1f3b50 100644 --- a/src/passes/item_deleter.rs +++ b/src/passes/item_deleter.rs @@ -1,7 +1,8 @@ use quote::ToTokens; use syn::{ visit_mut::VisitMut, Item, ItemConst, ItemEnum, ItemExternCrate, ItemFn, ItemMacro, ItemMacro2, - ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, Signature, + ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, ItemUse, + Signature, }; use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; @@ -73,9 +74,17 @@ impl<'a> Visitor<'a> { self.current_path.pop(); should_retain } + // We would hope for the unused imports pass to catch all of these + // but sadly that's not the case + Item::Use(ItemUse { tree, .. }) if self.checker.options.bisect_delete_imports => { + self.current_path.push(tree.to_token_stream().to_string()); + + let should_retain = self.should_retain_item(); + + self.current_path.pop(); + should_retain + } Item::ForeignMod(_) => true, - // We hope for the unused imports to show them all. - Item::Use(_) => true, Item::Verbatim(_) => true, _ => true, } diff --git a/src/passes/mod.rs b/src/passes/mod.rs index b17bb05..31fd014 100644 --- a/src/passes/mod.rs +++ b/src/passes/mod.rs @@ -2,8 +2,9 @@ mod everybody_loops; mod field_deleter; mod item_deleter; mod privatize; +mod split_use; pub use self::{ everybody_loops::EverybodyLoops, field_deleter::FieldDeleter, item_deleter::ItemDeleter, - privatize::Privatize, + privatize::Privatize, split_use::SplitUse, }; diff --git a/src/passes/split_use.rs b/src/passes/split_use.rs new file mode 100644 index 0000000..ee3f355 --- /dev/null +++ b/src/passes/split_use.rs @@ -0,0 +1,181 @@ +use std::ops::DerefMut; + +use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile}; +use quote::ToTokens; + +use syn::{visit_mut::VisitMut, Item, ItemUse, UseName, UsePath, UseRename, UseTree}; + +struct Visitor<'a> { + process_state: ProcessState, + current_path: Vec, + checker: &'a mut PassController, +} + +impl<'a> Visitor<'a> { + fn new(checker: &'a mut PassController) -> Self { + Self { + process_state: ProcessState::NoChange, + current_path: Vec::new(), + checker, + } + } + + // given a "some::group::{a, b::{c,d}, e}" tree, and assuming checker allows processing of (only) "some::group", + // returns a ["some::group::a", "some::group::b::{c,d}", "some::group::e"] list of trees. + fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &UseTree) -> Vec { + // It would probably be nice if instead of *expanding* the whole "some::group" group, we could instead + // *extract* individual items ("some::group::a"), but that makes code much more convoluted, sadly + match tree { + UseTree::Path(p) => { + self.current_path.push(p.ident.to_string()); + + let out = self + .expand_use_groups(top, &p.tree) + .into_iter() + .map(|x| { + let mut new = p.clone(); + new.tree = Box::new(x); + UseTree::Path(new) + }) + .collect(); + + self.current_path.pop(); + out + } + UseTree::Group(g) => { + let new_trees = g + .items + .iter() + .map(|subtree| self.expand_use_groups(top, subtree)) + .flatten() + .collect::>(); + + self.current_path.push("{{group}}".to_string()); + let can_process = self.checker.can_process(&self.current_path); + self.current_path.pop(); + + if can_process { + self.process_state = ProcessState::Changed; + return new_trees; + } else { + // Do not expand the group. + // recreate the UseTree::Group item (but with new subtrees), and return a single-element list + let mut g = g.clone(); + g.items.clear(); + g.items.extend(new_trees); + return vec![syn::UseTree::Group(g)]; + } + } + _ => return vec![tree.clone()], + } + } + + fn visit_item_list(&mut self, items: &mut Vec) { + let mut pos = 0; // index into the `items` list + while pos < items.len() { + let item_use: ItemUse = { + match &items[pos] { + Item::Use(u) => u.clone(), + _ => { + pos += 1; // if it's not a `use`` - simply advance to the next item + continue; + } + } + }; + + let new_use_trees = self.expand_use_groups(&item_use, &item_use.tree); + // decorate each of the UseTree with a `use` keyword (and any attributes inherited) + let new_uses = new_use_trees.into_iter().map(|x| { + let mut new = item_use.clone(); + new.tree = x; + trim_trailing_self(&mut new.tree); + syn::Item::Use(new) + }); + + let step = new_uses.len(); + // replace the old use with the new uses + items.splice(pos..pos + 1, new_uses); + pos += step; // do not process freshly inserted items + } + } +} + +// It is legal to write "use module::{self};", but not "use module::self;". +// If we do end up with the latter on our hands, convert it to "use module;" instead. +fn trim_trailing_self(use_tree: &mut UseTree) { + match use_tree { + UseTree::Path(UsePath { + tree: subtree, + ident: base_ident, + .. + }) => match subtree.deref_mut() { + UseTree::Name(UseName { ident: sub_ident }) => { + if sub_ident == "self" { + *use_tree = UseTree::Name(UseName { + ident: base_ident.clone(), + }); + return; + } + } + UseTree::Rename(syn::UseRename { + ident: sub_ident, + rename, + as_token, + }) => { + if sub_ident == "self" { + *use_tree = UseTree::Rename(UseRename { + ident: base_ident.clone(), + rename: rename.clone(), + as_token: as_token.clone(), + }); + return; + } + } + UseTree::Path(_) => trim_trailing_self(&mut *subtree), + _ => {} + }, + _ => {} + } +} + +impl VisitMut for Visitor<'_> { + fn visit_item_mod_mut(&mut self, item_mod: &mut syn::ItemMod) { + self.current_path.push(item_mod.ident.to_string()); + if let Some((_, items)) = &mut item_mod.content { + self.visit_item_list(items); + } + syn::visit_mut::visit_item_mod_mut(self, item_mod); + self.current_path.pop(); + } + fn visit_file_mut(&mut self, file: &mut syn::File) { + self.visit_item_list(&mut file.items); + syn::visit_mut::visit_file_mut(self, file); + } + + tracking!(visit_item_fn_mut); + tracking!(visit_impl_item_method_mut); + tracking!(visit_item_impl_mut); + tracking!(visit_field_mut); + tracking!(visit_item_struct_mut); + tracking!(visit_item_trait_mut); +} + +#[derive(Default)] +pub struct SplitUse {} + +impl Pass for SplitUse { + fn process_file( + &mut self, + krate: &mut syn::File, + _: &SourceFile, + checker: &mut PassController, + ) -> ProcessState { + let mut visitor = Visitor::new(checker); + visitor.visit_file_mut(krate); + visitor.process_state + } + + fn name(&self) -> &'static str { + "split-use" + } +} diff --git a/src/processor/checker.rs b/src/processor/checker.rs index e7da7a8..e279f77 100644 --- a/src/processor/checker.rs +++ b/src/processor/checker.rs @@ -174,15 +174,22 @@ impl PassController { if candidates.is_empty() { self.state = PassControllerState::Success; } else { - let current = mem::take(candidates) - .into_iter() - .collect::>(); + // We could just set `current=candidates; worklist=default()`, + // but we are doing a minor optimization to split overlapping items into separate tries, + // so as to reduce the number of bisection steps that yield literally no new info. + let layers = layer_candidates(mem::take(candidates)); + let mut worklist = Worklist::new(); + for layer in layers.into_iter().rev() { + // .rev() so that we add shorter paths last, and process them first + worklist.push(layer); + } + let current = worklist.pop().unwrap().into_iter().collect(); self.state = PassControllerState::Bisecting { committed: BTreeSet::new(), failed: BTreeSet::new(), current, - worklist: Worklist::new(), + worklist, }; } } @@ -261,3 +268,33 @@ fn split_owned, A: FromIterator, B: FromItera (first_half, second_half) } + +/// Split up a list of AstPath's into several sets such that none of those sets have +/// any of the AstPath's overlap. +/// I.e. so that we avoid having both ["foo"] and ["foo", "bar"] in the same set. +/// It is expected, but not guaranteed that the earlier sets would contain "less granular" items +/// (i.e. ["foo"] from the above example) and the latter sets would contain the "more granular" ones. +fn layer_candidates(mut candidates: Vec) -> Vec> { + candidates.sort(); // this *should* put less-granular/shorter-path items first + let mut layers: Vec> = vec![]; + for candidate in candidates { + let mut appropriate_layer_no = None; + for (no, layer) in layers.iter().enumerate() { + if !layer + .iter() + .all(|known_path| candidate.has_prefix(known_path)) + { + appropriate_layer_no = Some(no); + break; + } + } + match appropriate_layer_no { + Some(no) => layers[no].push(candidate), + None => { + let new_layer = vec![candidate]; + layers.push(new_layer); + } + } + } + layers +} diff --git a/src/processor/mod.rs b/src/processor/mod.rs index f167bda..2e22d86 100644 --- a/src/processor/mod.rs +++ b/src/processor/mod.rs @@ -50,6 +50,31 @@ pub(crate) enum ProcessState { FileInvalidated, } +#[derive(Debug, Clone)] +pub enum PassSelection { + Enable(Vec), + Disable(Vec), +} +impl std::str::FromStr for PassSelection { + type Err = &'static str; + fn from_str(s: &str) -> std::result::Result { + let values = s.split(',').collect::>(); + let have_negative = values.iter().any(|v| v.starts_with("no-")); + if have_negative && !values.iter().all(|v| v.starts_with("no-")) { + return Err("Pass exclusion is supported, by mixing positive pass selection with negative is not allowed (because it's pointless and confusing)"); + } + let actual_values = values + .into_iter() + .map(|v| v.strip_prefix("no-").unwrap_or(v).to_string()) + .collect(); + if !have_negative { + Ok(PassSelection::Enable(actual_values)) + } else { + Ok(PassSelection::Disable(actual_values)) + } + } +} + #[derive(Debug)] pub(crate) struct Minimizer { files: Vec, @@ -59,13 +84,12 @@ pub(crate) struct Minimizer { } impl Minimizer { - fn pass_disabled(&self, name: &str) -> bool { - if let Some(passes) = &self.options.passes { - if !passes.split(",").any(|allowed| name == allowed) { - return true; - } + fn pass_enabled(&self, name: &str) -> bool { + match &self.options.passes { + None => true, + Some(PassSelection::Enable(v)) => v.iter().any(|allowed| name == allowed), + Some(PassSelection::Disable(v)) => v.iter().all(|forbidden| name != forbidden), } - false } pub(crate) fn new_glob_dir( @@ -131,7 +155,7 @@ impl Minimizer { inital_build.require_reproduction("Initial")?; for mut pass in passes { - if self.pass_disabled(pass.name()) { + if !self.pass_enabled(pass.name()) { continue; } self.run_pass(&mut *pass)?; diff --git a/src/processor/reaper.rs b/src/processor/reaper.rs index ea3991c..25e0567 100644 --- a/src/processor/reaper.rs +++ b/src/processor/reaper.rs @@ -18,7 +18,7 @@ const PASS_NAME: &str = "delete-unused-functions"; impl Minimizer { pub fn delete_dead_code(&mut self) -> Result<()> { - if self.pass_disabled(PASS_NAME) { + if !self.pass_enabled(PASS_NAME) { return Ok(()); } diff --git a/testsuite/src/lib.rs b/testsuite/src/lib.rs index 8c7e031..f841e5c 100644 --- a/testsuite/src/lib.rs +++ b/testsuite/src/lib.rs @@ -170,6 +170,7 @@ fn build(cargo: &Path, path: &Path, regression_checker_path: &Path) -> Result<() flag.push(regression_checker_path); flag }); + cmd.arg("--bisect-delete-imports"); let minimize_roots = start_roots.join(",");