This commit is contained in:
moxian 2025-03-31 11:07:38 +00:00 committed by GitHub
commit 5a3fb95cc4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 432 additions and 72 deletions

1
Cargo.lock generated
View file

@ -102,6 +102,7 @@ dependencies = [
"ctrlc",
"genemichaels",
"libloading",
"markdown",
"owo-colors",
"proc-macro2",
"quote",

View file

@ -24,6 +24,7 @@ anyhow = "1.0.65"
clap = { version = "4.0.29", features = ["derive"] }
ctrlc = "3.2.5"
genemichaels = "0.1.21"
markdown = { version = "=1.0.0-alpha.14" } # pinning the version to ensure genemichaels builds.
libloading = "0.8.0"
owo-colors = "3.5.0"
proc-macro2 = { version = "1.0.48", features = ["span-locations"] }

View file

@ -1,8 +1,8 @@
# cargo-minimize
Install with `cargo install --git https://github.com/Nilstrieb/cargo-minimize cargo-minimize` and use with `cargo minimize`.
Install with `cargo install --git https://github.com/Noratrieb/cargo-minimize cargo-minimize` and use with `cargo minimize`.
For more info, see the [cookbook](https://github.com/Nilstrieb/cargo-minimize#cookbook).
For more info, see the [cookbook](https://github.com/Noratrieb/cargo-minimize#cookbook).
## Idea

View file

@ -0,0 +1,8 @@
pub mod foo {
/// ~MINIMIZE-ROOT good
pub fn good(){}
/// ~REQUIRE-DELETED bad
pub fn bad(){}
}
/// ~MINIMIZE-ROOT main
fn main(){}

View file

@ -0,0 +1,24 @@
// this should all get deleted in a single swoop *and* not panic about it
/// ~REQUIRE-DELETED l1
mod l1 {
mod l2 {
mod l3 {
mod l4{
mod l5 {
fn foo(){}
fn bar(){}
mod l6 {
fn x1(){}
}
fn x2(){}
}
}
mod l4_2 {
fn y(){}
}
}
}
fn x8(){}
}
/// ~MINIMIZE-ROOT main
fn main(){}

12
full-tests/reexports.rs Normal file
View file

@ -0,0 +1,12 @@
use hello::{thingy, whatever};
mod hello{
pub fn thingy(){}
/// ~REQUIRE-DELETED whatever
pub fn whatever(){}
}
fn main(){
"~MINIMIZE-ROOT let x = thingy";
let x = thingy();
}

15
full-tests/reexports2.rs Normal file
View file

@ -0,0 +1,15 @@
mod A {
/// ~REQUIRE-DELETED S1
pub struct S1;
pub struct S2;
}
mod B {
use crate::A::{self, S1};
pub use A::S2 as thingy;
}
fn main() {
"~MINIMIZE-ROOT let x = B::thingy";
let x = B::thingy;
}

View file

@ -25,7 +25,8 @@ impl FromStr for RustFunction {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::compile(s).context("compiling and loading rust function")
Self::compile(s)
.map_err(|e| anyhow::format_err!("compiling and loading rust function: {:?}", e))
}
}

View file

@ -20,7 +20,7 @@ mod expand;
use anyhow::{Context, Result};
use dylib_flag::RustFunction;
use processor::Minimizer;
use processor::{Minimizer, PassSelection};
use tracing::Level;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Registry};
@ -83,7 +83,7 @@ pub struct Options {
/// A comma-seperated list of passes that should be enabled. By default, all passes are enabled.
#[arg(long)]
pub passes: Option<String>,
pub passes: Option<PassSelection>,
/// A path to a script that is run to check whether code reproduces. When it exits with code 0, the
/// problem reproduces. If `--script-path-lints` isn't set, this script is also run to get lints.
@ -105,6 +105,10 @@ pub struct Options {
#[arg(skip)]
pub no_delete_functions: bool,
/// Remove individual use statements manually, instead of relying on rustc lints output
#[arg(long)]
pub bisect_delete_imports: bool,
}
#[derive(Debug, Clone)]
@ -142,6 +146,7 @@ pub fn minimize(options: Options, stop: Arc<AtomicBool>) -> Result<()> {
minimizer.run_passes([
passes::EverybodyLoops::default().boxed(),
passes::SplitUse::default().boxed(),
passes::FieldDeleter::default().boxed(),
passes::Privatize::default().boxed(),
])?;
@ -186,6 +191,7 @@ impl Default for Options {
script_path_lints: None,
ignore_file: Vec::new(),
no_delete_functions: false,
bisect_delete_imports: false,
}
}
}

View file

@ -1,7 +1,8 @@
use quote::ToTokens;
use syn::{
visit_mut::VisitMut, Item, ItemConst, ItemEnum, ItemExternCrate, ItemFn, ItemMacro, ItemMacro2,
ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, Signature,
ItemMod, ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, ItemUse,
Signature,
};
use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile};
@ -73,9 +74,17 @@ impl<'a> Visitor<'a> {
self.current_path.pop();
should_retain
}
// We would hope for the unused imports pass to catch all of these
// but sadly that's not the case
Item::Use(ItemUse { tree, .. }) if self.checker.options.bisect_delete_imports => {
self.current_path.push(tree.to_token_stream().to_string());
let should_retain = self.should_retain_item();
self.current_path.pop();
should_retain
}
Item::ForeignMod(_) => true,
// We hope for the unused imports to show them all.
Item::Use(_) => true,
Item::Verbatim(_) => true,
_ => true,
}

View file

@ -2,8 +2,9 @@ mod everybody_loops;
mod field_deleter;
mod item_deleter;
mod privatize;
mod split_use;
pub use self::{
everybody_loops::EverybodyLoops, field_deleter::FieldDeleter, item_deleter::ItemDeleter,
privatize::Privatize,
privatize::Privatize, split_use::SplitUse,
};

View file

@ -24,12 +24,32 @@ impl<'a> Visitor<'a> {
impl VisitMut for Visitor<'_> {
fn visit_visibility_mut(&mut self, vis: &mut Visibility) {
if let Visibility::Public(_) = vis {
self.current_path.push("{{vis}}".to_string());
if self.checker.can_process(&self.current_path) {
self.process_state = ProcessState::Changed;
*vis = self.pub_crate.clone();
}
self.current_path.pop();
}
}
fn visit_item_mut(&mut self, item: &mut syn::Item) {
match item {
syn::Item::Use(u) => {
if let Visibility::Public(_) = u.vis {
let mut path = self.current_path.clone();
path.push(u.to_token_stream().to_string());
if self.checker.can_process(&path) {
self.process_state = ProcessState::Changed;
u.vis = self.pub_crate.clone();
}
path.pop();
}
return; // early return; do not walk the child items
}
_ => {}
}
syn::visit_mut::visit_item_mut(self, item);
}
tracking!();
}

181
src/passes/split_use.rs Normal file
View file

@ -0,0 +1,181 @@
use std::ops::DerefMut;
use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile};
use quote::ToTokens;
use syn::{visit_mut::VisitMut, Item, ItemUse, UseName, UsePath, UseRename, UseTree};
struct Visitor<'a> {
process_state: ProcessState,
current_path: Vec<String>,
checker: &'a mut PassController,
}
impl<'a> Visitor<'a> {
fn new(checker: &'a mut PassController) -> Self {
Self {
process_state: ProcessState::NoChange,
current_path: Vec::new(),
checker,
}
}
// given a "some::group::{a, b::{c,d}, e}" tree, and assuming checker allows processing of (only) "some::group",
// returns a ["some::group::a", "some::group::b::{c,d}", "some::group::e"] list of trees.
fn expand_use_groups(&mut self, top: &syn::ItemUse, tree: &UseTree) -> Vec<UseTree> {
// It would probably be nice if instead of *expanding* the whole "some::group" group, we could instead
// *extract* individual items ("some::group::a"), but that makes code much more convoluted, sadly
match tree {
UseTree::Path(p) => {
self.current_path.push(p.ident.to_string());
let out = self
.expand_use_groups(top, &p.tree)
.into_iter()
.map(|x| {
let mut new = p.clone();
new.tree = Box::new(x);
UseTree::Path(new)
})
.collect();
self.current_path.pop();
out
}
UseTree::Group(g) => {
let new_trees = g
.items
.iter()
.map(|subtree| self.expand_use_groups(top, subtree))
.flatten()
.collect::<Vec<_>>();
self.current_path.push("{{group}}".to_string());
let can_process = self.checker.can_process(&self.current_path);
self.current_path.pop();
if can_process {
self.process_state = ProcessState::Changed;
return new_trees;
} else {
// Do not expand the group.
// recreate the UseTree::Group item (but with new subtrees), and return a single-element list
let mut g = g.clone();
g.items.clear();
g.items.extend(new_trees);
return vec![syn::UseTree::Group(g)];
}
}
_ => return vec![tree.clone()],
}
}
fn visit_item_list(&mut self, items: &mut Vec<syn::Item>) {
let mut pos = 0; // index into the `items` list
while pos < items.len() {
let item_use: ItemUse = {
match &items[pos] {
Item::Use(u) => u.clone(),
_ => {
pos += 1; // if it's not a `use`` - simply advance to the next item
continue;
}
}
};
let new_use_trees = self.expand_use_groups(&item_use, &item_use.tree);
// decorate each of the UseTree with a `use` keyword (and any attributes inherited)
let new_uses = new_use_trees.into_iter().map(|x| {
let mut new = item_use.clone();
new.tree = x;
trim_trailing_self(&mut new.tree);
syn::Item::Use(new)
});
let step = new_uses.len();
// replace the old use with the new uses
items.splice(pos..pos + 1, new_uses);
pos += step; // do not process freshly inserted items
}
}
}
// It is legal to write "use module::{self};", but not "use module::self;".
// If we do end up with the latter on our hands, convert it to "use module;" instead.
fn trim_trailing_self(use_tree: &mut UseTree) {
match use_tree {
UseTree::Path(UsePath {
tree: subtree,
ident: base_ident,
..
}) => match subtree.deref_mut() {
UseTree::Name(UseName { ident: sub_ident }) => {
if sub_ident == "self" {
*use_tree = UseTree::Name(UseName {
ident: base_ident.clone(),
});
return;
}
}
UseTree::Rename(syn::UseRename {
ident: sub_ident,
rename,
as_token,
}) => {
if sub_ident == "self" {
*use_tree = UseTree::Rename(UseRename {
ident: base_ident.clone(),
rename: rename.clone(),
as_token: as_token.clone(),
});
return;
}
}
UseTree::Path(_) => trim_trailing_self(&mut *subtree),
_ => {}
},
_ => {}
}
}
impl VisitMut for Visitor<'_> {
fn visit_item_mod_mut(&mut self, item_mod: &mut syn::ItemMod) {
self.current_path.push(item_mod.ident.to_string());
if let Some((_, items)) = &mut item_mod.content {
self.visit_item_list(items);
}
syn::visit_mut::visit_item_mod_mut(self, item_mod);
self.current_path.pop();
}
fn visit_file_mut(&mut self, file: &mut syn::File) {
self.visit_item_list(&mut file.items);
syn::visit_mut::visit_file_mut(self, file);
}
tracking!(visit_item_fn_mut);
tracking!(visit_impl_item_method_mut);
tracking!(visit_item_impl_mut);
tracking!(visit_field_mut);
tracking!(visit_item_struct_mut);
tracking!(visit_item_trait_mut);
}
#[derive(Default)]
pub struct SplitUse {}
impl Pass for SplitUse {
fn process_file(
&mut self,
krate: &mut syn::File,
_: &SourceFile,
checker: &mut PassController,
) -> ProcessState {
let mut visitor = Visitor::new(checker);
visitor.visit_file_mut(krate);
visitor.process_state
}
fn name(&self) -> &'static str {
"split-use"
}
}

View file

@ -6,6 +6,14 @@ use self::worklist::Worklist;
#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
struct AstPath(Vec<String>);
impl AstPath {
fn has_prefix(&self, other: &AstPath) -> bool {
if self.0.len() < other.0.len() {
return false;
}
std::iter::zip(self.0.iter(), other.0.iter()).all(|(a, b)| a == b)
}
}
impl Borrow<[String]> for AstPath {
fn borrow(&self) -> &[String] {
@ -75,6 +83,20 @@ mod worklist {
pub(super) fn pop(&mut self) -> Option<Vec<AstPath>> {
self.0.pop()
}
// remove all the worklist items that would have been covered by the
// given ones.
// I.e. if we have already deleted the entire module, there's no need
// trying to delete that module's individual items anymore
pub(super) fn prune(&mut self, things: &std::collections::BTreeSet<AstPath>) {
for wl in &mut self.0 {
wl.retain(|path| {
// retain only if none of the things are a prefix of this path
things.iter().all(|thing| !path.has_prefix(thing))
})
}
self.0.retain(|wl| !wl.is_empty());
}
}
}
@ -97,8 +119,9 @@ impl PassController {
committed,
failed: _,
current,
worklist: _,
worklist,
} => {
worklist.prune(current);
committed.extend(mem::take(current));
self.next_in_worklist();
@ -110,19 +133,8 @@ impl PassController {
/// The changes did not reproduce the regression. Bisect further.
pub fn does_not_reproduce(&mut self) {
match &mut self.state {
PassControllerState::InitialCollection { candidates } => {
// Applying them all was too much, let's bisect!
let (current, first_worklist_item) = split_owned(mem::take(candidates));
let mut worklist = Worklist::new();
worklist.push(first_worklist_item);
self.state = PassControllerState::Bisecting {
committed: BTreeSet::new(),
failed: BTreeSet::new(),
current,
worklist,
};
PassControllerState::InitialCollection { candidates: _ } => {
unreachable!("we should have made no changes on initial collection, what do you mean it does not reproduce?!?")
}
PassControllerState::Bisecting {
committed,
@ -155,15 +167,31 @@ impl PassController {
}
}
/// The pass did not apply any changes. We're done.
/// The pass did not apply any changes. We're either done or just starting
pub fn no_change(&mut self) {
match &self.state {
match &mut self.state {
PassControllerState::InitialCollection { candidates } => {
assert!(
candidates.is_empty(),
"No change but received candidates. The responsible pass does not seem to track the ProcessState correctly: {candidates:?}"
);
self.state = PassControllerState::Success;
if candidates.is_empty() {
self.state = PassControllerState::Success;
} else {
// We could just set `current=candidates; worklist=default()`,
// but we are doing a minor optimization to split overlapping items into separate tries,
// so as to reduce the number of bisection steps that yield literally no new info.
let layers = layer_candidates(mem::take(candidates));
let mut worklist = Worklist::new();
for layer in layers.into_iter().rev() {
// .rev() so that we add shorter paths last, and process them first
worklist.push(layer);
}
let current = worklist.pop().unwrap().into_iter().collect();
self.state = PassControllerState::Bisecting {
committed: BTreeSet::new(),
failed: BTreeSet::new(),
current,
worklist,
};
}
}
PassControllerState::Bisecting { current, .. } => {
unreachable!("Pass said it didn't change anything in the bisection phase, nils forgot what this means: {current:?}");
@ -184,9 +212,9 @@ impl PassController {
pub fn can_process(&mut self, path: &[String]) -> bool {
match &mut self.state {
PassControllerState::InitialCollection { candidates } => {
// For the initial collection, we collect the candidate and apply them all.
// For the initial collection, we collect the candidate but don't apply anything
candidates.push(AstPath(path.to_owned()));
true
false
}
PassControllerState::Bisecting { current, .. } => current.contains(path),
PassControllerState::Success { .. } => {
@ -205,6 +233,7 @@ impl PassController {
match worklist.pop() {
Some(next) => {
*current = next.into_iter().collect();
trace!(?current, "current working set: ");
}
None => {
self.state = PassControllerState::Success;
@ -239,3 +268,33 @@ fn split_owned<T, From: IntoIterator<Item = T>, A: FromIterator<T>, B: FromItera
(first_half, second_half)
}
/// Split up a list of AstPath's into several sets such that none of those sets have
/// any of the AstPath's overlap.
/// I.e. so that we avoid having both ["foo"] and ["foo", "bar"] in the same set.
/// It is expected, but not guaranteed that the earlier sets would contain "less granular" items
/// (i.e. ["foo"] from the above example) and the latter sets would contain the "more granular" ones.
fn layer_candidates(mut candidates: Vec<AstPath>) -> Vec<Vec<AstPath>> {
candidates.sort(); // this *should* put less-granular/shorter-path items first
let mut layers: Vec<Vec<AstPath>> = vec![];
for candidate in candidates {
let mut appropriate_layer_no = None;
for (no, layer) in layers.iter().enumerate() {
if !layer
.iter()
.any(|known_path| candidate.has_prefix(known_path))
{
appropriate_layer_no = Some(no);
break;
}
}
match appropriate_layer_no {
Some(no) => layers[no].push(candidate),
None => {
let new_layer = vec![candidate];
layers.push(new_layer);
}
}
}
layers
}

View file

@ -19,7 +19,7 @@ pub(crate) trait Pass {
/// Process a file. The state of the processor might get invalidated in the process as signaled with
/// `ProcessState::FileInvalidated`. When a file is invalidated, the minimizer will call `Processor::refersh_state`
/// before calling the this function on the same file again.
/// before calling this function on the same file again.
fn process_file(
&mut self,
krate: &mut syn::File,
@ -50,6 +50,31 @@ pub(crate) enum ProcessState {
FileInvalidated,
}
#[derive(Debug, Clone)]
pub enum PassSelection {
Enable(Vec<String>),
Disable(Vec<String>),
}
impl std::str::FromStr for PassSelection {
type Err = &'static str;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
let values = s.split(',').collect::<Vec<_>>();
let have_negative = values.iter().any(|v| v.starts_with("no-"));
if have_negative && !values.iter().all(|v| v.starts_with("no-")) {
return Err("Pass exclusion is supported, by mixing positive pass selection with negative is not allowed (because it's pointless and confusing)");
}
let actual_values = values
.into_iter()
.map(|v| v.strip_prefix("no-").unwrap_or(v).to_string())
.collect();
if !have_negative {
Ok(PassSelection::Enable(actual_values))
} else {
Ok(PassSelection::Disable(actual_values))
}
}
}
#[derive(Debug)]
pub(crate) struct Minimizer {
files: Vec<SourceFile>,
@ -59,13 +84,12 @@ pub(crate) struct Minimizer {
}
impl Minimizer {
fn pass_disabled(&self, name: &str) -> bool {
if let Some(passes) = &self.options.passes {
if !passes.split(",").any(|allowed| name == allowed) {
return true;
}
fn pass_enabled(&self, name: &str) -> bool {
match &self.options.passes {
None => true,
Some(PassSelection::Enable(v)) => v.iter().any(|allowed| name == allowed),
Some(PassSelection::Disable(v)) => v.iter().all(|forbidden| name != forbidden),
}
false
}
pub(crate) fn new_glob_dir(
@ -131,7 +155,7 @@ impl Minimizer {
inital_build.require_reproduction("Initial")?;
for mut pass in passes {
if self.pass_disabled(pass.name()) {
if !self.pass_enabled(pass.name()) {
continue;
}
self.run_pass(&mut *pass)?;
@ -187,6 +211,7 @@ impl Minimizer {
// The logic for bisecting down lives in PassController.
let mut checker = PassController::new(self.options.clone());
let mut initial_pass = true;
loop {
let mut change = file.try_change(changes)?;
let (_, krate) = change.before_content();
@ -203,24 +228,27 @@ impl Minimizer {
if after.reproduces_issue() {
change.commit();
checker.reproduces();
if has_made_change == ProcessState::FileInvalidated {
invalidated_files.insert(file);
break;
}
} else {
change.rollback()?;
checker.does_not_reproduce();
}
if has_made_change == ProcessState::FileInvalidated {
invalidated_files.insert(file);
}
}
ProcessState::NoChange => {
if self.options.no_color {
info!("{file:?}: After {}: no changes", pass.name());
} else {
info!("{file:?}: After {}: {}", pass.name(), "no changes".yellow());
if !initial_pass {
if self.options.no_color {
info!("{file:?}: After {}: no changes", pass.name());
} else {
info!("{file:?}: After {}: {}", pass.name(), "no changes".yellow());
}
}
checker.no_change();
}
}
initial_pass = false;
if self.cancel.load(Ordering::SeqCst) {
info!("Exiting early.");

View file

@ -7,11 +7,7 @@ use anyhow::{Context, Result};
use proc_macro2::Span;
use quote::ToTokens;
use rustfix::{diagnostics::Diagnostic, Suggestion};
use std::{
collections::{HashMap, HashSet},
ops::Range,
path::{Path, PathBuf},
};
use std::{collections::HashMap, ops::Range, path::Path};
use syn::{visit_mut::VisitMut, ImplItem, Item};
fn file_for_suggestion(suggestion: &Suggestion) -> &Path {
@ -22,7 +18,7 @@ const PASS_NAME: &str = "delete-unused-functions";
impl Minimizer {
pub fn delete_dead_code(&mut self) -> Result<()> {
if self.pass_disabled(PASS_NAME) {
if !self.pass_enabled(PASS_NAME) {
return Ok(());
}
@ -79,9 +75,17 @@ impl Minimizer {
.copied()
.cloned()
.collect::<Vec<_>>();
if desired_suggestions.is_empty() {
continue;
}
let result =
rustfix::apply_suggestions(change.before_content().0, &desired_suggestions)?;
anyhow::ensure!(
result != change.before_content().0,
"Suggestions 'applied' but no changes made??"
);
let result = syn::parse_file(&result).context("parsing file after rustfix")?;
change.write(result)?;
@ -103,16 +107,11 @@ impl Minimizer {
struct DeleteUnusedFunctions {
diags: Vec<Diagnostic>,
build: Build,
invalid: HashSet<PathBuf>,
}
impl DeleteUnusedFunctions {
fn new(build: Build, diags: Vec<Diagnostic>) -> Self {
DeleteUnusedFunctions {
diags,
build,
invalid: HashSet::new(),
}
DeleteUnusedFunctions { diags, build }
}
}
@ -120,7 +119,6 @@ impl Pass for DeleteUnusedFunctions {
fn refresh_state(&mut self) -> Result<()> {
let (diags, _) = self.build.get_diags().context("getting diagnostics")?;
self.diags = diags;
self.invalid.clear();
Ok(())
}
@ -130,18 +128,9 @@ impl Pass for DeleteUnusedFunctions {
file: &SourceFile,
checker: &mut super::PassController,
) -> ProcessState {
assert!(
!self.invalid.contains(file.path_no_fs_interact()),
"processing with invalid state"
);
let mut visitor = FindUnusedFunction::new(file, self.diags.iter(), checker);
visitor.visit_file_mut(krate);
if visitor.process_state == ProcessState::FileInvalidated {
self.invalid.insert(file.path_no_fs_interact().to_owned());
}
visitor.process_state
}
@ -235,8 +224,12 @@ impl<'a> FindUnusedFunction<'a> {
match span_matches {
0 => true,
1 => {
self.process_state = ProcessState::FileInvalidated;
!self.checker.can_process(&self.current_path)
if self.checker.can_process(&self.current_path) {
self.process_state = ProcessState::FileInvalidated;
!self.checker.can_process(&self.current_path)
} else {
true
}
}
_ => {
panic!("multiple dead_code spans matched identifier: {span_matches}.");

View file

@ -170,6 +170,7 @@ fn build(cargo: &Path, path: &Path, regression_checker_path: &Path) -> Result<()
flag.push(regression_checker_path);
flag
});
cmd.arg("--bisect-delete-imports");
let minimize_roots = start_roots.join(",");