This commit is contained in:
nora 2024-01-01 00:34:58 +01:00 committed by GitHub
commit d67bd069fb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 265 additions and 168 deletions

2
.gitignore vendored
View file

@ -5,3 +5,5 @@ target
/invalid
.direnv/
g
perf.data*
*.svg

42
Cargo.lock generated
View file

@ -113,6 +113,9 @@ dependencies = [
"tracing",
"tracing-subscriber",
"tracing-tree",
"tree-sitter",
"tree-sitter-edit",
"tree-sitter-rust",
"walkdir",
]
@ -855,6 +858,45 @@ dependencies = [
"tracing-subscriber",
]
[[package]]
name = "tree-sitter"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e747b1f9b7b931ed39a548c1fae149101497de3c1fc8d9e18c62c1a66c683d3d"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-edit"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed3213ee656e99748eca539913b5c90df3d52618d9a1714e0935013955c8031"
dependencies = [
"tree-sitter",
"tree-sitter-traversal",
]
[[package]]
name = "tree-sitter-rust"
version = "0.20.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0832309b0b2b6d33760ce5c0e818cb47e1d72b468516bfe4134408926fa7594"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-traversal"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df8a158225e4a4d8505f071340bba9edd109b23f01b70540dccb7c799868f307"
dependencies = [
"tree-sitter",
]
[[package]]
name = "unicode-id"
version = "0.3.4"

View file

@ -1,6 +1,6 @@
[workspace]
members = ["./", "./testsuite"]
exclude = ["test-cases/*", "full-tests/*"]
exclude = ["full-tests/*"]
[package]
name = "cargo-minimize"
@ -16,8 +16,12 @@ license = "MIT OR Apache-2.0"
[profile.release]
lto = "thin"
[profile.dev]
opt-level = 1
[profile.dev.package.proc-macro2]
opt-level = 3
[profile.dev.package.syn]
opt-level = 3
[profile.dev.package.genemichaels]
opt-level = 3
[dependencies]
anyhow = "1.0.65"
@ -36,4 +40,7 @@ tempfile = "3.3.0"
tracing = "0.1.37"
tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }
tracing-tree = "0.2.2"
tree-sitter = "0.20.10"
tree-sitter-edit = "0.3.0"
tree-sitter-rust = "0.20.4"
walkdir = "2.3.2"

View file

@ -32,7 +32,7 @@ impl FromStr for RustFunction {
fn wrap_func_body(func: &str) -> Result<String> {
let closure = syn::parse_str::<syn::ExprClosure>(func).context("invalid rust syntax")?;
let syn_file = syn::parse_quote! {
let file = quote::quote! {
#[repr(C)]
pub struct __RawOutput {
out_ptr: *const u8,
@ -80,7 +80,7 @@ fn wrap_func_body(func: &str) -> Result<String> {
}
};
crate::formatting::format(syn_file)
Ok(file.to_string())
}
impl RustFunction {

View file

@ -1,12 +0,0 @@
use std::collections::HashMap;
use anyhow::Context;
use genemichaels::FormatConfig;
pub fn format(file: syn::File) -> anyhow::Result<String> {
Ok(
genemichaels::format_ast(file, &FormatConfig::default(), HashMap::new())
.context("formatting source file")?
.rendered,
)
}

View file

@ -9,7 +9,7 @@ use std::{
mod build;
mod dylib_flag;
mod formatting;
mod tree_sitter;
mod passes;
mod processor;

View file

@ -1,7 +1,7 @@
use quote::ToTokens;
use syn::{visit_mut::VisitMut, Fields};
use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile};
use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile, MinimizeEdit};
struct Visitor<'a> {
current_path: Vec<String>,
@ -75,6 +75,17 @@ impl Pass for FieldDeleter {
visitor.process_state
}
fn edits_for_node(&mut self, node: tree_sitter::Node, _edits: &mut Vec<MinimizeEdit>) {
match node.kind() {
// Braced structs
"field_declaration_list" => {}
// Tuple structs
"ordered_field_declaration_list" => {}
_ => {}
}
}
fn name(&self) -> &'static str {
"field-deleter"
}

View file

@ -1,52 +1,18 @@
use quote::ToTokens;
use syn::{parse_quote, visit_mut::VisitMut, Visibility};
use tree_sitter_edit::NodeId;
use crate::processor::{tracking, Pass, PassController, ProcessState, SourceFile};
struct Visitor<'a> {
pub_crate: Visibility,
process_state: ProcessState,
current_path: Vec<String>,
checker: &'a mut PassController,
}
impl<'a> Visitor<'a> {
fn new(checker: &'a mut PassController) -> Self {
Self {
process_state: ProcessState::NoChange,
pub_crate: parse_quote! { pub(crate) },
current_path: Vec::new(),
checker,
}
}
}
impl VisitMut for Visitor<'_> {
fn visit_visibility_mut(&mut self, vis: &mut Visibility) {
if let Visibility::Public(_) = vis {
if self.checker.can_process(&self.current_path) {
self.process_state = ProcessState::Changed;
*vis = self.pub_crate.clone();
}
}
}
tracking!();
}
use crate::processor::{MinimizeEdit, MinimizeEditKind, Pass};
#[derive(Default)]
pub struct Privatize {}
impl Pass for Privatize {
fn process_file(
&mut self,
krate: &mut syn::File,
_: &SourceFile,
checker: &mut PassController,
) -> ProcessState {
let mut visitor = Visitor::new(checker);
visitor.visit_file_mut(krate);
visitor.process_state
fn edits_for_node(&mut self, node: tree_sitter::Node, edits: &mut Vec<MinimizeEdit>) {
if node.kind() == "visibility_modifier" {
edits.push(MinimizeEdit {
node_id: NodeId::new(&node),
kind: MinimizeEditKind::DeleteNode,
});
}
}
fn name(&self) -> &'static str {

View file

@ -4,6 +4,8 @@ use crate::Options;
use self::worklist::Worklist;
use super::MinimizeEdit;
#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
struct AstPath(Vec<String>);
@ -31,22 +33,18 @@ pub(crate) struct PassController {
/// The current state of the bisection.
#[derive(Debug)]
enum PassControllerState {
/// Initially, we have a bunch of candidates (minimization sites) that could be applied.
/// We collect them in the initial application of the pass where we try to apply all candiates.
/// If that works, great! We're done. But often it doesn't and we enter the next stage.
InitialCollection { candidates: Vec<AstPath> },
/// After applying all candidates fails, we know that we have a few bad candidates.
/// Now our job is to apply all the good candidates as efficiently as possible.
Bisecting {
/// These candidates could be applied successfully while still reproducing the issue.
/// They are now on disk and will be included in all subsequent runs.
/// This is only used for debugging, we could also just throw them away.
committed: BTreeSet<AstPath>,
committed: BTreeSet<MinimizeEdit>,
/// These candidates failed in isolation and are therefore bad.
/// This is only used for debugging, we could also just throw them away.
failed: BTreeSet<AstPath>,
failed: BTreeSet<MinimizeEdit>,
/// The set of candidates that we want to apply in this iteration.
current: BTreeSet<AstPath>,
current: Vec<MinimizeEdit>,
/// The list of `current`s that we want to try in the future.
worklist: Worklist,
},
@ -55,34 +53,37 @@ enum PassControllerState {
}
mod worklist {
use super::AstPath;
use crate::processor::MinimizeEdit;
/// A worklist that ensures that the inner list is never empty.
#[derive(Debug)]
pub(super) struct Worklist(Vec<Vec<AstPath>>);
pub(super) struct Worklist(Vec<Vec<MinimizeEdit>>);
impl Worklist {
pub(super) fn new() -> Self {
Self(Vec::new())
}
pub(super) fn push(&mut self, next: Vec<AstPath>) {
pub(super) fn push(&mut self, next: Vec<MinimizeEdit>) {
if !next.is_empty() {
self.0.push(next);
}
}
pub(super) fn pop(&mut self) -> Option<Vec<AstPath>> {
pub(super) fn pop(&mut self) -> Option<Vec<MinimizeEdit>> {
self.0.pop()
}
}
}
impl PassController {
pub fn new(options: Options) -> Self {
pub fn new(options: Options, edits: Vec<MinimizeEdit>) -> Self {
Self {
state: PassControllerState::InitialCollection {
candidates: Vec::new(),
state: PassControllerState::Bisecting {
committed: BTreeSet::new(),
failed: BTreeSet::new(),
current: edits,
worklist: Worklist::new(),
},
options,
}
@ -90,9 +91,6 @@ impl PassController {
pub fn reproduces(&mut self) {
match &mut self.state {
PassControllerState::InitialCollection { .. } => {
self.state = PassControllerState::Success;
}
PassControllerState::Bisecting {
committed,
failed: _,
@ -110,20 +108,6 @@ impl PassController {
/// The changes did not reproduce the regression. Bisect further.
pub fn does_not_reproduce(&mut self) {
match &mut self.state {
PassControllerState::InitialCollection { candidates } => {
// Applying them all was too much, let's bisect!
let (current, first_worklist_item) = split_owned(mem::take(candidates));
let mut worklist = Worklist::new();
worklist.push(first_worklist_item);
self.state = PassControllerState::Bisecting {
committed: BTreeSet::new(),
failed: BTreeSet::new(),
current,
worklist,
};
}
PassControllerState::Bisecting {
committed,
failed,
@ -158,15 +142,9 @@ impl PassController {
/// The pass did not apply any changes. We're done.
pub fn no_change(&mut self) {
match &self.state {
PassControllerState::InitialCollection { candidates } => {
assert!(
candidates.is_empty(),
"No change but received candidates. The responsible pass does not seem to track the ProcessState correctly: {candidates:?}"
);
self.state = PassControllerState::Success;
}
PassControllerState::Bisecting { current, .. } => {
unreachable!("Pass said it didn't change anything in the bisection phase, nils forgot what this means: {current:?}");
assert!(current.is_empty(), "there are edits available and yet nothing changed, that's nonsense, there's a bug somewhere (i dont know where)");
self.state = PassControllerState::Success;
}
PassControllerState::Success { .. } => {}
}
@ -174,21 +152,19 @@ impl PassController {
pub fn is_finished(&mut self) -> bool {
match &mut self.state {
PassControllerState::InitialCollection { .. } => false,
PassControllerState::Bisecting { .. } => false,
PassControllerState::Success { .. } => true,
}
}
/// Checks whether a pass may apply the changes for a minimization site.
pub fn can_process(&mut self, path: &[String]) -> bool {
match &mut self.state {
PassControllerState::InitialCollection { candidates } => {
// For the initial collection, we collect the candidate and apply them all.
candidates.push(AstPath(path.to_owned()));
true
pub fn can_process(&mut self, _: &[String]) -> bool {
false
}
PassControllerState::Bisecting { current, .. } => current.contains(path),
/// Checks whether a pass may apply the changes for a minimization site.
pub fn current_work_items(&mut self) -> &[MinimizeEdit] {
match &mut self.state {
PassControllerState::Bisecting { current, .. } => current,
PassControllerState::Success { .. } => {
unreachable!("Processed further after success");
}

View file

@ -3,6 +3,8 @@ use std::{fs, path::Path};
pub(crate) use self::file::SourceFile;
use super::MinimizeEdit;
mod file {
use anyhow::{Context, Result};
use std::{
@ -10,6 +12,8 @@ mod file {
path::{Path, PathBuf},
};
use crate::processor::MinimizeEdit;
use super::{Changes, FileChange};
/// The representation of a source file, with the cached AST.
@ -19,34 +23,59 @@ mod file {
pub(crate) struct SourceFile {
path: PathBuf,
content_str: RefCell<String>,
content: RefCell<syn::File>,
content: RefCell<tree_sitter::Tree>,
}
impl SourceFile {
pub(crate) fn open(path: PathBuf) -> Result<Self> {
let string = std::fs::read_to_string(&path)
.with_context(|| format!("reading file {}", path.display()))?;
let content = syn::parse_file(&string)
.with_context(|| format!("parsing file {}", path.display()))?;
let content_ts = crate::tree_sitter::parse(&string)
.with_context(|| format!("parsing file {path:?}"))?;
Ok(SourceFile {
path,
content_str: RefCell::new(string),
content: RefCell::new(content),
content: RefCell::new(content_ts),
})
}
pub(crate) fn write(&self, new: syn::File) -> Result<()> {
let string = crate::formatting::format(new.clone())?;
pub(crate) fn write(&self, new: tree_sitter::Tree, edits: &[MinimizeEdit]) -> Result<()> {
let string = crate::tree_sitter::apply_edits(new, &*self.content_str.borrow(), edits)?;
std::fs::write(&self.path, &string)
.with_context(|| format!("writing file {}", self.path.display()))?;
let reparsed =
crate::tree_sitter::parse(&string).expect("failed to reparse after edit");
*self.content_str.borrow_mut() = string;
*self.content.borrow_mut() = new;
*self.content.borrow_mut() = reparsed;
Ok(())
}
pub(crate) fn path_no_fs_interact(&self) -> &Path {
&self.path
}
pub(crate) fn borrow_tree(&self) -> std::cell::Ref<'_, tree_sitter::Tree> {
self.content.borrow()
}
pub(crate) fn try_change<'file, 'change>(
&'file self,
changes: &'change mut Changes,
) -> Result<FileChange<'file, 'change>> {
let path = &self.path;
Ok(FileChange {
path,
source_file: self,
changes,
has_written_change: false,
before_content_str: self.content_str.borrow().clone(),
before_content: self.content.borrow().clone(),
})
}
}
impl PartialEq for SourceFile {
@ -68,23 +97,6 @@ mod file {
write!(f, "{}", self.path.display())
}
}
impl SourceFile {
pub(crate) fn try_change<'file, 'change>(
&'file self,
changes: &'change mut Changes,
) -> Result<FileChange<'file, 'change>> {
let path = &self.path;
Ok(FileChange {
path,
source_file: self,
changes,
has_written_change: false,
before_content_str: self.content_str.borrow().clone(),
before_content: self.content.borrow().clone(),
})
}
}
}
#[derive(Default)]
@ -96,26 +108,26 @@ pub(crate) struct FileChange<'a, 'b> {
pub(crate) path: &'a Path,
source_file: &'a SourceFile,
before_content_str: String,
before_content: syn::File,
before_content: tree_sitter::Tree,
changes: &'b mut Changes,
has_written_change: bool,
}
impl FileChange<'_, '_> {
pub(crate) fn before_content(&self) -> (&str, &syn::File) {
pub(crate) fn before_content(&self) -> (&str, &tree_sitter::Tree) {
(&self.before_content_str, &self.before_content)
}
pub(crate) fn write(&mut self, new: syn::File) -> Result<()> {
pub(crate) fn write(&mut self, new: tree_sitter::Tree, edits: &[MinimizeEdit]) -> Result<()> {
self.has_written_change = true;
self.source_file.write(new)?;
self.source_file.write(new, edits)?;
Ok(())
}
pub(crate) fn rollback(mut self) -> Result<()> {
assert!(self.has_written_change);
self.has_written_change = false;
self.source_file.write(self.before_content.clone())?;
self.source_file.write(self.before_content.clone(), &[])?;
Ok(())
}

View file

@ -5,10 +5,11 @@ mod reaper;
pub(crate) use self::files::SourceFile;
use crate::{build::Build, processor::files::Changes, Options};
use anyhow::{bail, Context, Result};
use owo_colors::OwoColorize;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::{collections::HashSet, ffi::OsStr, fmt::Debug, sync::atomic::AtomicBool};
use tree_sitter::Node;
use tree_sitter_edit::NodeId;
pub(crate) use self::checker::PassController;
@ -22,10 +23,14 @@ pub(crate) trait Pass {
/// before calling the this function on the same file again.
fn process_file(
&mut self,
krate: &mut syn::File,
file: &SourceFile,
checker: &mut PassController,
) -> ProcessState;
_krate: &mut syn::File,
_file: &SourceFile,
_checker: &mut PassController,
) -> ProcessState {
unimplemented!()
}
fn edits_for_node(&mut self, _node: tree_sitter::Node, _edits: &mut Vec<MinimizeEdit>) {}
fn name(&self) -> &'static str;
@ -50,6 +55,17 @@ pub(crate) enum ProcessState {
FileInvalidated,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct MinimizeEdit {
pub(crate) node_id: NodeId,
pub(crate) kind: MinimizeEditKind,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) enum MinimizeEditKind {
DeleteNode,
}
#[derive(Debug)]
pub(crate) struct Minimizer {
files: Vec<SourceFile>,
@ -173,12 +189,12 @@ impl Minimizer {
}
}
#[instrument(skip(self, pass, invalidated_files, changes), fields(pass = %pass.name()), level = "debug")]
#[instrument(skip(self, pass, _invalidated_files, changes), fields(pass = %pass.name()), level = "debug")]
fn process_file<'file>(
&self,
pass: &mut dyn Pass,
file: &'file SourceFile,
invalidated_files: &mut HashSet<&'file SourceFile>,
_invalidated_files: &mut HashSet<&'file SourceFile>,
changes: &mut Changes,
) -> Result<()> {
// The core logic of minimization.
@ -186,16 +202,34 @@ impl Minimizer {
// For this, we repeatedly try to apply a pass to a subset of a file until we've exhausted all options.
// The logic for bisecting down lives in PassController.
let mut checker = PassController::new(self.options.clone());
let mut edits = Vec::new();
let krate = file.borrow_tree();
recursive_walk_node(krate.root_node(), &mut |node| {
pass.edits_for_node(node, &mut edits);
});
drop(krate);
let mut checker = PassController::new(self.options.clone(), edits);
loop {
let mut change = file.try_change(changes)?;
let (_, krate) = change.before_content();
let mut krate = krate.clone();
let has_made_change = pass.process_file(&mut krate, file, &mut checker);
let krate = krate.clone();
match has_made_change {
ProcessState::Changed | ProcessState::FileInvalidated => {
change.write(krate)?;
let edits = checker.current_work_items();
match edits.len() {
0 => {
use owo_colors::OwoColorize;
if self.options.no_color {
info!("{file:?}: After {}: no changes", pass.name());
} else {
info!("{file:?}: After {}: {}", pass.name(), "no changes".yellow());
}
checker.no_change();
}
1.. => {
change.write(krate, edits)?;
let after = self.build.build()?;
info!("{file:?}: After {}: {after}", pass.name());
@ -207,18 +241,6 @@ impl Minimizer {
change.rollback()?;
checker.does_not_reproduce();
}
if has_made_change == ProcessState::FileInvalidated {
invalidated_files.insert(file);
}
}
ProcessState::NoChange => {
if self.options.no_color {
info!("{file:?}: After {}: no changes", pass.name());
} else {
info!("{file:?}: After {}: {}", pass.name(), "no changes".yellow());
}
checker.no_change();
}
}
@ -235,6 +257,14 @@ impl Minimizer {
}
}
fn recursive_walk_node<'a>(node: Node<'a>, for_each: &mut impl FnMut(Node<'_>)) {
for i in 0..node.child_count() {
let child = node.child(i).unwrap();
for_each(child);
recursive_walk_node(child, for_each);
}
}
macro_rules! tracking {
() => {
tracking!(visit_item_fn_mut);

View file

@ -82,8 +82,8 @@ impl Minimizer {
let result =
rustfix::apply_suggestions(change.before_content().0, &desired_suggestions)?;
let result = syn::parse_file(&result).context("parsing file after rustfix")?;
change.write(result)?;
let result = crate::tree_sitter::parse(&result).context("parsing file after rustfix")?;
change.write(result, &[])?;
let after = self.build.build()?;

53
src/tree_sitter.rs Normal file
View file

@ -0,0 +1,53 @@
use anyhow::{Context, Result};
use crate::processor::{MinimizeEdit, MinimizeEditKind};
pub fn parse(source: &str) -> Result<tree_sitter::Tree> {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(tree_sitter_rust::language())
.context("loading tree sitter rust grammar")?;
let content_ts = parser.parse(source, None).context("parsing file")?;
Ok(content_ts)
}
pub fn apply_edits(
file: tree_sitter::Tree, // Taking it by value as the old tree should not be used afterwards
source: &str,
edits: &[MinimizeEdit],
) -> anyhow::Result<String> {
let mut s = Vec::new();
tree_sitter_edit::render(&mut s, &file, source.as_bytes(), &MinimizeEditor { edits })
.context("printing tree")?;
Ok(String::from_utf8(s).unwrap())
}
struct MinimizeEditor<'a> {
edits: &'a [MinimizeEdit],
}
impl tree_sitter_edit::Editor for MinimizeEditor<'_> {
fn has_edit(&self, _tree: &tree_sitter::Tree, node: &tree_sitter::Node<'_>) -> bool {
self.edits.iter().any(|edit| edit.node_id.is(node))
}
fn edit(
&self,
_source: &[u8],
_tree: &tree_sitter::Tree,
node: &tree_sitter::Node<'_>,
) -> Vec<u8> {
self.edits
.iter()
.filter(|edit| edit.node_id.is(node))
.find_map(|edit| {
Some({
match edit.kind {
MinimizeEditKind::DeleteNode => Vec::new(),
}
})
})
.unwrap()
}
}

View file

@ -101,7 +101,7 @@ pub fn full_tests() -> Result<()> {
let path = child.path();
build(&cargo, &path, &regression_checker_path)
.with_context(|| format!("building {:?}", path.file_name().unwrap()))
.with_context(|| format!("test {:?}", path.file_name().unwrap()))
})
.collect::<Result<Vec<_>>>()?;
} else {
@ -109,7 +109,7 @@ pub fn full_tests() -> Result<()> {
let path = child.path();
build(&cargo, &path, &regression_checker_path)
.with_context(|| format!("building {:?}", path.file_name().unwrap()))?;
.with_context(|| format!("test {:?}", path.file_name().unwrap()))?;
}
}
@ -159,6 +159,10 @@ fn build(cargo: &Path, path: &Path, regression_checker_path: &Path) -> Result<()
.canonicalize()
.context("canonicalizing target/debug/cargo-minimize")?;
if is_ignored(&proj_dir).context("checking whether the test is ignored")? {
return Ok(());
}
let start_roots = get_roots(&proj_dir).context("getting initial MINIMIZE-ROOTs")?;
let mut cmd = Command::new(cargo_minimize);
@ -209,6 +213,12 @@ fn get_required_deleted(path: &Path) -> Result<Vec<String>> {
grep(path, &REGEX)
}
fn is_ignored(path: &Path) -> Result<bool> {
static REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"~IGNORE").unwrap());
grep(path, &REGEX).map(|v| !v.is_empty())
}
fn grep(path: &Path, regex: &Regex) -> Result<Vec<String>> {
let path = path.join("src");
let mut results = Vec::new();