Compare commits

...

8 commits

Author SHA1 Message Date
moxian
5a3fb95cc4
Merge ccb3612511 into cf39338b30 2025-03-31 11:07:38 +00:00
moxian
ccb3612511 Allow specifying NOT-passes on the command line 2025-03-31 04:07:29 -07:00
moxian
21a4843791 Actually, let's not bisect imports by default
I thought it was a good idea, but now that reaper is
not completely busted, i see that this is taking a bit
longer than i'd like.. It's easier to just run reaper again
2025-03-31 04:07:29 -07:00
moxian
2031a180b7 Properly handle imports ending in ::self 2025-03-31 04:07:29 -07:00
moxian
1e46e75c6e Teach item-deleter to delete imports.
Because reaper is finnicky and does not always
work for whatever reason.

The new test from a couple commits ago now passes.
2025-03-31 04:07:28 -07:00
moxian
a70aedada5 Add a split-use pass.
It transforms "use std::io::{Read, Write}" into
"use std::io::Read; use std::io::Write"

Which later allows to remove just precisely the statements
we do not need, and leave rest be.

The test from the last commit does not pass, but that
is seemingly to do with the test harness setup, since it
works fine locally.
2025-03-31 04:07:28 -07:00
moxian
fdf0066e58 Add a test for grouped reexports (failing) 2025-03-31 04:07:28 -07:00
moxian
33c32bc0e7 Minor optimization for now-overlapping paths
Naively, if we have items (1) foo, (2) foo::bar, (3) foo::baz,
we would try to first try to remove all three of them,
and then if that fails, try to remove just (1) and (2)

..except we should already know that that would fail since it still
includes (1) which covers all three items anyway.

So, try to be smarter about this and not do that.

This is achieved by avoiding ever putting both foo and foo::whatever
in the same try-set, as that gives no information on foo::whatever
regardless of if the check passes or fails
2025-03-31 04:07:19 -07:00
10 changed files with 304 additions and 18 deletions

12
full-tests/reexports.rs Normal file
View file

@ -0,0 +1,12 @@
use hello::{thingy, whatever};
mod hello{
pub fn thingy(){}
/// ~REQUIRE-DELETED whatever
pub fn whatever(){}
}
fn main(){
"~MINIMIZE-ROOT let x = thingy";
let x = thingy();
}

15
full-tests/reexports2.rs Normal file
View file

@ -0,0 +1,15 @@
mod A {
/// ~REQUIRE-DELETED S1
pub struct S1;
pub struct S2;
}
mod B {
use crate::A::{self, S1};
pub use A::S2 as thingy;
}
fn main() {
"~MINIMIZE-ROOT let x = B::thingy";
let x = B::thingy;
}

View file

@ -20,7 +20,7 @@ mod expand;
use anyhow::{Context, Result};
use dylib_flag::RustFunction;
use processor::Minimizer;
use processor::{Minimizer, PassSelection};
use tracing::Level;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Registry};
@ -83,7 +83,7 @@ pub struct Options {
/// A comma-seperated list of passes that should be enabled. By default, all passes are enabled.
#[arg(long)]
pub passes: Option<String>,
pub passes: Option<PassSelection>,
/// A path to a script that is run to check whether code reproduces. When it exits with code 0, the
/// problem reproduces. If `--script-path-lints` isn't set, this script is also run to get lints.
@ -105,6 +105,10 @@ pub struct Options {
#[arg(skip)]
pub no_delete_functions: bool,
/// Remove individual use statements manually, instead of relying on rustc lints output
#[arg(long)]
pub bisect_delete_imports: bool,
}
#[derive(Debug, Clone)]
@ -142,6 +146,7 @@ pub fn minimize(options: Options, stop: Arc<AtomicBool>) -> Result<()> {
minimizer.run_passes([
passes::EverybodyLoops::default().boxed(),
passes::SplitUse::default().boxed(),
passes::FieldDeleter::default().boxed(),
passes::Privatize::default().boxed(),
])?;
@ -186,6 +191,7 @@ impl Default for Options {
script_path_lints: None,
ignore_file: Vec::new(),
no_delete_functions: false,
bisect_delete_imports: false,
}
}
}

View file

@ -1,7 +1,8 @@
use quote::ToTokens;
use syn::{
visit_mut::VisitMut, Item, ItemConst, ItemEnum, ItemExternCrate, ItemFn, ItemMacro, ItemMacro2,
ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, Signature,
ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, ItemUse,
Signature,
};
use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile};
@ -73,9 +74,17 @@ impl<'a> Visitor<'a> {
self.current_path.pop();
should_retain
}
// We would hope for the unused imports pass to catch all of these
// but sadly that's not the case
Item::Use(ItemUse { tree, .. }) if self.checker.options.bisect_delete_imports => {
self.current_path.push(tree.to_token_stream().to_string());
let should_retain = self.should_retain_item();
self.current_path.pop();
should_retain
}
Item::ForeignMod(_) => true,
// We hope for the unused imports to show them all.
Item::Use(_) => true,
Item::Verbatim(_) => true,
_ => true,
}

View file

@ -2,8 +2,9 @@ mod everybody_loops;
mod field_deleter;
mod item_deleter;
mod privatize;
mod split_use;
pub use self::{
everybody_loops::EverybodyLoops, field_deleter::FieldDeleter, item_deleter::ItemDeleter,
privatize::Privatize,
privatize::Privatize, split_use::SplitUse,
};

181
src/passes/split_use.rs Normal file
View file

@ -0,0 +1,181 @@
use std::ops::DerefMut;
use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile};
use quote::ToTokens;
use syn::{visit_mut::VisitMut, Item, ItemUse, UseName, UsePath, UseRename, UseTree};
struct Visitor<'a> {
process_state: ProcessState,
current_path: Vec<String>,
checker: &'a mut PassController,
}
impl<'a> Visitor<'a> {
fn new(checker: &'a mut PassController) -> Self {
Self {
process_state: ProcessState::NoChange,
current_path: Vec::new(),
checker,
}
}
// given a "some::group::{a, b::{c,d}, e}" tree, and assuming checker allows processing of (only) "some::group",
// returns a ["some::group::a", "some::group::b::{c,d}", "some::group::e"] list of trees.
fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &UseTree) -> Vec<UseTree> {
// It would probably be nice if instead of *expanding* the whole "some::group" group, we could instead
// *extract* individual items ("some::group::a"), but that makes code much more convoluted, sadly
match tree {
UseTree::Path(p) => {
self.current_path.push(p.ident.to_string());
let out = self
.expand_use_groups(top, &p.tree)
.into_iter()
.map(|x| {
let mut new = p.clone();
new.tree = Box::new(x);
UseTree::Path(new)
})
.collect();
self.current_path.pop();
out
}
UseTree::Group(g) => {
let new_trees = g
.items
.iter()
.map(|subtree| self.expand_use_groups(top, subtree))
.flatten()
.collect::<Vec<_>>();
self.current_path.push("{{group}}".to_string());
let can_process = self.checker.can_process(&self.current_path);
self.current_path.pop();
if can_process {
self.process_state = ProcessState::Changed;
return new_trees;
} else {
// Do not expand the group.
// recreate the UseTree::Group item (but with new subtrees), and return a single-element list
let mut g = g.clone();
g.items.clear();
g.items.extend(new_trees);
return vec![syn::UseTree::Group(g)];
}
}
_ => return vec![tree.clone()],
}
}
fn visit_item_list(&mut self, items: &mut Vec<syn::Item>) {
let mut pos = 0; // index into the `items` list
while pos < items.len() {
let item_use: ItemUse = {
match &items[pos] {
Item::Use(u) => u.clone(),
_ => {
pos += 1; // if it's not a `use`` - simply advance to the next item
continue;
}
}
};
let new_use_trees = self.expand_use_groups(&item_use, &item_use.tree);
// decorate each of the UseTree with a `use` keyword (and any attributes inherited)
let new_uses = new_use_trees.into_iter().map(|x| {
let mut new = item_use.clone();
new.tree = x;
trim_trailing_self(&mut new.tree);
syn::Item::Use(new)
});
let step = new_uses.len();
// replace the old use with the new uses
items.splice(pos..pos + 1, new_uses);
pos += step; // do not process freshly inserted items
}
}
}
// It is legal to write "use module::{self};", but not "use module::self;".
// If we do end up with the latter on our hands, convert it to "use module;" instead.
fn trim_trailing_self(use_tree: &mut UseTree) {
match use_tree {
UseTree::Path(UsePath {
tree: subtree,
ident: base_ident,
..
}) => match subtree.deref_mut() {
UseTree::Name(UseName { ident: sub_ident }) => {
if sub_ident == "self" {
*use_tree = UseTree::Name(UseName {
ident: base_ident.clone(),
});
return;
}
}
UseTree::Rename(syn::UseRename {
ident: sub_ident,
rename,
as_token,
}) => {
if sub_ident == "self" {
*use_tree = UseTree::Rename(UseRename {
ident: base_ident.clone(),
rename: rename.clone(),
as_token: as_token.clone(),
});
return;
}
}
UseTree::Path(_) => trim_trailing_self(&mut *subtree),
_ => {}
},
_ => {}
}
}
impl VisitMut for Visitor<'_> {
fn visit_item_mod_mut(&mut self, item_mod: &mut syn::ItemMod) {
self.current_path.push(item_mod.ident.to_string());
if let Some((_, items)) = &mut item_mod.content {
self.visit_item_list(items);
}
syn::visit_mut::visit_item_mod_mut(self, item_mod);
self.current_path.pop();
}
fn visit_file_mut(&mut self, file: &mut syn::File) {
self.visit_item_list(&mut file.items);
syn::visit_mut::visit_file_mut(self, file);
}
tracking!(visit_item_fn_mut);
tracking!(visit_impl_item_method_mut);
tracking!(visit_item_impl_mut);
tracking!(visit_field_mut);
tracking!(visit_item_struct_mut);
tracking!(visit_item_trait_mut);
}
#[derive(Default)]
pub struct SplitUse {}
impl Pass for SplitUse {
fn process_file(
&mut self,
krate: &mut syn::File,
_: &SourceFile,
checker: &mut PassController,
) -> ProcessState {
let mut visitor = Visitor::new(checker);
visitor.visit_file_mut(krate);
visitor.process_state
}
fn name(&self) -> &'static str {
"split-use"
}
}

View file

@ -174,15 +174,22 @@ impl PassController {
if candidates.is_empty() {
self.state = PassControllerState::Success;
} else {
let current = mem::take(candidates)
.into_iter()
.collect::<BTreeSet<AstPath>>();
// We could just set `current=candidates; worklist=default()`,
// but we are doing a minor optimization to split overlapping items into separate tries,
// so as to reduce the number of bisection steps that yield literally no new info.
let layers = layer_candidates(mem::take(candidates));
let mut worklist = Worklist::new();
for layer in layers.into_iter().rev() {
// .rev() so that we add shorter paths last, and process them first
worklist.push(layer);
}
let current = worklist.pop().unwrap().into_iter().collect();
self.state = PassControllerState::Bisecting {
committed: BTreeSet::new(),
failed: BTreeSet::new(),
current,
worklist: Worklist::new(),
worklist,
};
}
}
@ -261,3 +268,33 @@ fn split_owned<T, From: IntoIterator<Item = T>, A: FromIterator<T>, B: FromItera
(first_half, second_half)
}
/// Split up a list of AstPath's into several sets such that none of those sets have
/// any of the AstPath's overlap.
/// I.e. so that we avoid having both ["foo"] and ["foo", "bar"] in the same set.
/// It is expected, but not guaranteed that the earlier sets would contain "less granular" items
/// (i.e. ["foo"] from the above example) and the latter sets would contain the "more granular" ones.
fn layer_candidates(mut candidates: Vec<AstPath>) -> Vec<Vec<AstPath>> {
candidates.sort(); // this *should* put less-granular/shorter-path items first
let mut layers: Vec<Vec<AstPath>> = vec![];
for candidate in candidates {
let mut appropriate_layer_no = None;
for (no, layer) in layers.iter().enumerate() {
if !layer
.iter()
.any(|known_path| candidate.has_prefix(known_path))
{
appropriate_layer_no = Some(no);
break;
}
}
match appropriate_layer_no {
Some(no) => layers[no].push(candidate),
None => {
let new_layer = vec![candidate];
layers.push(new_layer);
}
}
}
layers
}

View file

@ -50,6 +50,31 @@ pub(crate) enum ProcessState {
FileInvalidated,
}
#[derive(Debug, Clone)]
pub enum PassSelection {
Enable(Vec<String>),
Disable(Vec<String>),
}
impl std::str::FromStr for PassSelection {
type Err = &'static str;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
let values = s.split(',').collect::<Vec<_>>();
let have_negative = values.iter().any(|v| v.starts_with("no-"));
if have_negative && !values.iter().all(|v| v.starts_with("no-")) {
return Err("Pass exclusion is supported, by mixing positive pass selection with negative is not allowed (because it's pointless and confusing)");
}
let actual_values = values
.into_iter()
.map(|v| v.strip_prefix("no-").unwrap_or(v).to_string())
.collect();
if !have_negative {
Ok(PassSelection::Enable(actual_values))
} else {
Ok(PassSelection::Disable(actual_values))
}
}
}
#[derive(Debug)]
pub(crate) struct Minimizer {
files: Vec<SourceFile>,
@ -59,14 +84,13 @@ pub(crate) struct Minimizer {
}
impl Minimizer {
fn pass_disabled(&self, name: &str) -> bool {
if let Some(passes) = &self.options.passes {
if !passes.split(",").any(|allowed| name == allowed) {
return true;
fn pass_enabled(&self, name: &str) -> bool {
match &self.options.passes {
None => true,
Some(PassSelection::Enable(v)) => v.iter().any(|allowed| name == allowed),
Some(PassSelection::Disable(v)) => v.iter().all(|forbidden| name != forbidden),
}
}
false
}
pub(crate) fn new_glob_dir(
options: Options,
@ -131,7 +155,7 @@ impl Minimizer {
inital_build.require_reproduction("Initial")?;
for mut pass in passes {
if self.pass_disabled(pass.name()) {
if !self.pass_enabled(pass.name()) {
continue;
}
self.run_pass(&mut *pass)?;

View file

@ -18,7 +18,7 @@ const PASS_NAME: &str = "delete-unused-functions";
impl Minimizer {
pub fn delete_dead_code(&mut self) -> Result<()> {
if self.pass_disabled(PASS_NAME) {
if !self.pass_enabled(PASS_NAME) {
return Ok(());
}

View file

@ -170,6 +170,7 @@ fn build(cargo: &Path, path: &Path, regression_checker_path: &Path) -> Result<()
flag.push(regression_checker_path);
flag
});
cmd.arg("--bisect-delete-imports");
let minimize_roots = start_roots.join(",");