diff --git a/Cargo.lock b/Cargo.lock index 5a986db..beea351 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -152,6 +152,7 @@ dependencies = [ "anyhow", "cargo", "clap 4.0.29", + "libc", "prettyplease", "proc-macro2", "quote", @@ -159,6 +160,7 @@ dependencies = [ "serde", "serde_json", "syn", + "tempfile", "tracing", "tracing-subscriber", "tracing-tree", @@ -736,9 +738,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.134" +version = "0.2.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb" +checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" [[package]] name = "libgit2-sys" diff --git a/Cargo.toml b/Cargo.toml index 2b468c7..c8aaa20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,11 @@ rustfix = "0.6.1" serde = { version = "1.0.151", features = ["derive"] } serde_json = "1.0.90" syn = { version = "1.0.101", features = ["full", "visit", "visit-mut"] } +tempfile = "3.3.0" tracing = "0.1.37" tracing-subscriber = { version = "0.3.16", features = ["env-filter"] } tracing-tree = "0.2.2" walkdir = "2.3.2" + +[target."cfg(unix)".dependencies] +libc = "0.2.138" diff --git a/src/build.rs b/src/build.rs index a5d2d5f..6c40706 100644 --- a/src/build.rs +++ b/src/build.rs @@ -1,20 +1,42 @@ use anyhow::{bail, Context, Result}; use rustfix::diagnostics::Diagnostic; use serde::Deserialize; -use std::{collections::HashSet, fmt::Display, path::PathBuf, process::Command, rc::Rc}; +use std::{ + collections::HashSet, + fmt::{Debug, Display}, + path::PathBuf, + process::Command, + rc::Rc, +}; -use crate::{EnvVar, Options}; +use crate::{dylib_flag::RustFunction, EnvVar, Options}; #[derive(Debug, Clone)] pub struct Build { inner: Rc, } +pub enum Verify { + Ice, + Custom(RustFunction), + None, +} + +impl Debug for Verify { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Ice => write!(f, "Ice"), + Self::Custom(_) => f.debug_tuple("Custom").finish(), + Self::None => write!(f, "None"), + } + } +} + #[derive(Debug)] struct BuildInner { mode: BuildMode, input_path: PathBuf, - no_verify: bool, + verify: Verify, env: Vec, } @@ -40,18 +62,26 @@ impl Build { } }; + let verify = if options.no_verify { + Verify::None + } else if let Some(func) = options.verify_fn { + Verify::Custom(func) + } else { + Verify::Ice + }; + Self { inner: Rc::new(BuildInner { mode, input_path: options.path.clone(), - no_verify: options.no_verify, + verify, env: options.env.clone(), }), } } pub fn build(&self) -> Result { - if self.inner.no_verify { + if let Verify::None = self.inner.verify { return Ok(BuildResult { reproduces_issue: false, no_verify: true, @@ -59,7 +89,7 @@ impl Build { }); } - let (reproduces_issue, output) = match &self.inner.mode { + let (is_ice, output) = match &self.inner.mode { BuildMode::Cargo { args } => { let mut cmd = Command::new("cargo"); cmd.arg("build"); @@ -116,9 +146,15 @@ impl Build { } }; + let reproduces_issue = match self.inner.verify { + Verify::None => unreachable!("handled ealier"), + Verify::Ice => is_ice, + Verify::Custom(func) => func.call(&output), + }; + Ok(BuildResult { reproduces_issue, - no_verify: self.inner.no_verify, + no_verify: false, output, }) } @@ -144,8 +180,6 @@ impl Build { .into_iter::() .collect::, _>>()?; - - messages .into_iter() .filter(|msg| msg.reason == "compiler-message") diff --git a/src/dylib_flag.rs b/src/dylib_flag.rs new file mode 100644 index 0000000..25c1786 --- /dev/null +++ b/src/dylib_flag.rs @@ -0,0 +1,116 @@ +//! Handles the --verify-fn flag. +//! It takes in a Rust closure like `|str| true` that takes in a `&str` and returns a bool. + +use std::{fmt::Debug, str::FromStr}; + +use anyhow::{Context, Result}; +use quote::quote; + +type Entrypoint = unsafe extern "C" fn(*const u8, usize) -> bool; + +#[derive(Clone, Copy)] +pub struct RustFunction { + func: Entrypoint, +} + +impl FromStr for RustFunction { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + Self::compile(s) + } +} + +fn wrap_func_body(func: &str) -> Result { + let closure = syn::parse_str::(func).context("invalid rust syntax")?; + + let tokenstream = quote! { + #[no_mangle] + pub extern "C" fn cargo_minimize_ffi_function(ptr: *const u8, len: usize) -> bool { + match ::std::panic::catch_unwind(|| __cargo_minimize_inner(ptr, len)) { + Ok(bool) => bool, + Err(_) => ::std::process::abort(), + } + } + + fn __cargo_minimize_inner(__ptr: *const u8, __len: usize) -> bool { + let __slice = unsafe { ::std::slice::from_raw_parts(__ptr, __len) }; + let __str = ::std::str::from_utf8(__slice).unwrap(); + + (#closure)(__str) + } + }; + + Ok(tokenstream.to_string()) +} + +impl RustFunction { + #[cfg(not(unix))] + pub fn compile(body: &str) -> Result { + Err(anyhow::anyhow!("--verify-fn only works on unix")); + } + + #[cfg(unix)] + pub fn compile(body: &str) -> Result { + use anyhow::bail; + use std::io; + use std::process::Command; + use std::{ffi::CString, os::unix::prelude::OsStringExt}; + + let file = tempfile::tempdir()?; + + let full_file = wrap_func_body(body)?; + + let source_path = file.path().join("source.rs"); + + std::fs::write(&source_path, &full_file).context("writing source")?; + + let mut rustc = Command::new("rustc"); + rustc.arg(source_path); + rustc.args(["--crate-type=cdylib", "--crate-name=helper", "--emit=link"]); + rustc.current_dir(file.path()); + + let output = rustc.output().context("running rustc")?; + if !output.status.success() { + let stderr = String::from_utf8(output.stderr)?; + bail!("Failed to compile code: {stderr}"); + } + + let dylib_path = file.path().join("libhelper.so"); + + let os_str = dylib_path.into_os_string(); + let vec = os_str.into_vec(); + let cstr = CString::new(vec)?; + + let dylib = unsafe { libc::dlopen(cstr.as_ptr(), libc::RTLD_LAZY) }; + + if dylib.is_null() { + bail!("failed to open dylib: {}", io::Error::last_os_error()); + } + + let symbol = b"cargo_minimize_ffi_function\0"; + + let func = unsafe { libc::dlsym(dylib, symbol.as_ptr().cast()) }; + + if func.is_null() { + bail!("didn't find entrypoint symbol"); + } + + let func = unsafe { std::mem::transmute::<*mut _, Entrypoint>(func) }; + + Ok(Self { func }) + } + + pub fn call(&self, output: &str) -> bool { + let ptr = output.as_ptr(); + let len = output.len(); + + unsafe { (self.func)(ptr, len) } + } +} + +impl Debug for RustFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RustFunction").finish_non_exhaustive() + } +} diff --git a/src/lib.rs b/src/lib.rs index d3a599a..57f1218 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ extern crate tracing; use std::{path::PathBuf, str::FromStr}; mod build; +mod dylib_flag; mod everybody_loops; mod expand; mod privatize; @@ -11,9 +12,10 @@ mod processor; use anyhow::{Context, Result}; use clap::Parser; +use dylib_flag::RustFunction; use processor::Minimizer; -use crate::{processor::Processor}; +use crate::processor::Processor; #[derive(clap::Parser)] #[command(version, about, name = "cargo", bin_name = "cargo")] @@ -33,6 +35,8 @@ pub struct Options { rustc: bool, #[arg(long)] no_verify: bool, + #[arg(long)] + verify_fn: Option, #[arg(long)] env: Vec, diff --git a/src/processor/files.rs b/src/processor/files.rs index e9ad284..3e1fae2 100644 --- a/src/processor/files.rs +++ b/src/processor/files.rs @@ -1,50 +1,40 @@ use anyhow::{Context, Result}; -use std::{ - fs, - path::{Path, PathBuf}, -}; - +use std::{fs, path::{Path, PathBuf}}; #[derive(Debug, PartialEq, Eq, Clone, Hash)] -pub struct SourceFile { - pub path: PathBuf, +pub(crate) struct SourceFile { + pub(crate) path: PathBuf, } - #[derive(Default)] -pub struct Changes { +pub(crate) struct Changes { any_change: bool, } - -pub struct FileChange<'a, 'b> { - pub path: &'a Path, +pub(crate) struct FileChange<'a, 'b> { + pub(crate) path: &'a Path, content: String, changes: &'b mut Changes, has_written_change: bool, } - impl FileChange<'_, '_> { - pub fn before_content(&self) -> &str { + pub(crate) fn before_content(&self) -> &str { &self.content } - - pub fn write(&mut self, new: &str) -> Result<()> { + pub(crate) fn write(&mut self, new: &str) -> Result<()> { self.has_written_change = true; - fs::write(self.path, new).with_context(|| format!("writing file {}", self.path.display())) + fs::write(self.path, new) + .with_context(|| format!("writing file {}", self.path.display())) } - - pub fn rollback(mut self) -> Result<()> { + pub(crate) fn rollback(mut self) -> Result<()> { assert!(self.has_written_change); self.has_written_change = false; fs::write(self.path, &self.content) .with_context(|| format!("writing file {}", self.path.display())) } - - pub fn commit(mut self) { + pub(crate) fn commit(mut self) { assert!(self.has_written_change); self.has_written_change = false; self.changes.any_change = true; } } - impl Drop for FileChange<'_, '_> { fn drop(&mut self) { if self.has_written_change { @@ -55,9 +45,8 @@ impl Drop for FileChange<'_, '_> { } } } - impl SourceFile { - pub fn try_change<'file, 'change>( + pub(crate) fn try_change<'file, 'change>( &'file self, changes: &'change mut Changes, ) -> Result> { @@ -71,9 +60,8 @@ impl SourceFile { }) } } - impl Changes { - pub fn had_changes(&self) -> bool { + pub(crate) fn had_changes(&self) -> bool { self.any_change } } diff --git a/src/processor/mod.rs b/src/processor/mod.rs index 65fa6b1..b611bc0 100644 --- a/src/processor/mod.rs +++ b/src/processor/mod.rs @@ -1,19 +1,13 @@ mod files; mod reaper; - use std::{borrow::Borrow, collections::HashSet, ffi::OsStr, fmt::Debug, mem, path::Path}; - use anyhow::{Context, Result}; - use crate::{build::Build, processor::files::Changes}; - -pub use self::files::SourceFile; - -pub trait Processor { +pub(crate) use self::files::SourceFile; +pub(crate) trait Processor { fn refresh_state(&mut self) -> Result<()> { Ok(()) } - /// Process a file. The state of the processor might get invalidated in the process as signaled with /// `ProcessState::FileInvalidated`. When a file is invalidated, the minimizer will call `Processor::refersh_state` /// before calling the this function on the same file again. @@ -23,33 +17,27 @@ pub trait Processor { file: &SourceFile, checker: &mut PassController, ) -> ProcessState; - fn name(&self) -> &'static str; } - impl Debug for dyn Processor { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(self.name()) } } - #[derive(Debug, PartialEq, Eq)] -pub enum ProcessState { +pub(crate) enum ProcessState { NoChange, Changed, FileInvalidated, } - #[derive(Debug)] -pub struct Minimizer { +pub(crate) struct Minimizer { files: Vec, build: Build, } - impl Minimizer { - pub fn new_glob_dir(path: &Path, build: Build) -> Self { + pub(crate) fn new_glob_dir(path: &Path, build: Build) -> Self { let walk = walkdir::WalkDir::new(path); - let files = walk .into_iter() .filter_map(|entry| match entry { @@ -67,54 +55,41 @@ impl Minimizer { println!("- {}", file.path.display()); }) .collect(); - Self { files, build } } - - pub fn run_passes<'a>( + pub(crate) fn run_passes<'a>( &self, passes: impl IntoIterator>, ) -> Result<()> { let inital_build = self.build.build()?; println!("Initial build: {inital_build}"); inital_build.require_reproduction("Initial")?; - for mut pass in passes { self.run_pass(&mut *pass)?; } - Ok(()) } - fn run_pass(&self, pass: &mut dyn Processor) -> Result<()> { let mut invalidated_files = HashSet::new(); - let mut refresh_and_try_again = false; - loop { let span = info_span!("Starting round of pass", name = pass.name()); let _enter = span.enter(); - let mut changes = Changes::default(); - for file in &self.files { if invalidated_files.contains(file) { continue; } - self.process_file(pass, file, &mut invalidated_files, &mut changes)?; } - if !changes.had_changes() { if !refresh_and_try_again && !invalidated_files.is_empty() { - // A few files have been invalidated, let's refresh and try these again. pass.refresh_state().context("refreshing state for pass")?; invalidated_files.clear(); refresh_and_try_again = true; println!("Refreshing files for {}", pass.name()); continue; } - println!("Finished {}", pass.name()); return Ok(()); } else { @@ -122,7 +97,6 @@ impl Minimizer { } } } - fn process_file<'file>( &self, pass: &mut dyn Processor, @@ -131,29 +105,19 @@ impl Minimizer { changes: &mut Changes, ) -> Result<()> { let mut checker = PassController::new(); - loop { - dbg!(&checker); - + dbg!(& checker); let file_display = file.path.display(); - let mut change = file.try_change(changes)?; - let mut krate = syn::parse_file(change.before_content()) .with_context(|| format!("parsing file {file_display}"))?; - let has_made_change = pass.process_file(&mut krate, file, &mut checker); - match has_made_change { ProcessState::Changed | ProcessState::FileInvalidated => { let result = prettyplease::unparse(&krate); - change.write(&result)?; - let after = self.build.build()?; - println!("{file_display}: After {}: {after}", pass.name()); - if after.reproduces_issue() { change.commit(); checker.reproduces(); @@ -161,7 +125,6 @@ impl Minimizer { change.rollback()?; checker.does_not_reproduce(); } - if has_made_change == ProcessState::FileInvalidated { invalidated_files.insert(file); } @@ -171,50 +134,35 @@ impl Minimizer { checker.no_change(); } } - if checker.is_finished() { break; } } - Ok(()) } } - #[derive(Clone, PartialEq, Eq, Hash)] struct AstPath(Vec); - impl Borrow<[String]> for AstPath { fn borrow(&self) -> &[String] { &self.0 } } - impl Debug for AstPath { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "AstPath({:?})", self.0) } } - #[derive(Debug)] -pub struct PassController { +pub(crate) struct PassController { state: PassControllerState, } - #[derive(Debug)] enum PassControllerState { - InitialCollection { - candidates: Vec, - }, - - Bisecting { - current: HashSet, - worklist: Vec>, - }, - + InitialCollection { candidates: Vec }, + Bisecting { current: HashSet, worklist: Vec> }, Success, } - impl PassController { fn new() -> Self { Self { @@ -223,44 +171,41 @@ impl PassController { }, } } - fn reproduces(&mut self) { match &mut self.state { PassControllerState::InitialCollection { .. } => { - self.state = PassControllerState::Success + self.state = PassControllerState::Success; } - PassControllerState::Bisecting { - current, worklist, .. - } => match worklist.pop() { - Some(next) => *current = next.into_iter().collect(), - None => { - self.state = PassControllerState::Success; + PassControllerState::Bisecting { current, worklist, .. } => { + match worklist.pop() { + Some(next) => *current = next.into_iter().collect(), + None => { + self.state = PassControllerState::Success; + } } - }, + } PassControllerState::Success => unreachable!("Processed after success"), } } - fn does_not_reproduce(&mut self) { match &mut self.state { PassControllerState::InitialCollection { candidates } => { let candidates = mem::take(candidates); let half = candidates.len() / 2; let (first_half, second_half) = candidates.split_at(half); - - self.state = PassControllerState::Bisecting { + self + .state = PassControllerState::Bisecting { current: first_half.iter().cloned().collect(), worklist: vec![second_half.to_owned()], }; } PassControllerState::Bisecting { current, worklist } => { - dbg!(¤t, &worklist); + dbg!(& current, & worklist); todo!(); } PassControllerState::Success => unreachable!("Processed after success"), } } - fn no_change(&mut self) { match &self.state { PassControllerState::InitialCollection { candidates } => { @@ -271,12 +216,13 @@ impl PassController { self.state = PassControllerState::Success; } PassControllerState::Bisecting { current, .. } => { - unreachable!("No change while bisecting, current was empty somehow: {current:?}"); + unreachable!( + "No change while bisecting, current was empty somehow: {current:?}" + ); } PassControllerState::Success => {} } } - fn is_finished(&mut self) -> bool { match &mut self.state { PassControllerState::InitialCollection { .. } => false, @@ -284,8 +230,7 @@ impl PassController { PassControllerState::Success => true, } } - - pub fn can_process(&mut self, path: &[String]) -> bool { + pub(crate) fn can_process(&mut self, path: &[String]) -> bool { match &mut self.state { PassControllerState::InitialCollection { candidates } => { candidates.push(AstPath(path.to_owned())); @@ -298,43 +243,31 @@ impl PassController { } } } - macro_rules! tracking { () => { - tracking!(visit_item_fn_mut); - tracking!(visit_impl_item_method_mut); - tracking!(visit_item_impl_mut); - tracking!(visit_item_mod_mut); + tracking!(visit_item_fn_mut); tracking!(visit_impl_item_method_mut); + tracking!(visit_item_impl_mut); tracking!(visit_item_mod_mut); }; (visit_item_fn_mut) => { - fn visit_item_fn_mut(&mut self, func: &mut syn::ItemFn) { - self.current_path.push(func.sig.ident.to_string()); - syn::visit_mut::visit_item_fn_mut(self, func); - self.current_path.pop(); - } + fn visit_item_fn_mut(& mut self, func : & mut syn::ItemFn) { self.current_path + .push(func.sig.ident.to_string()); syn::visit_mut::visit_item_fn_mut(self, func); + self.current_path.pop(); } }; (visit_impl_item_method_mut) => { - fn visit_impl_item_method_mut(&mut self, method: &mut syn::ImplItemMethod) { - self.current_path.push(method.sig.ident.to_string()); - syn::visit_mut::visit_impl_item_method_mut(self, method); - self.current_path.pop(); - } + fn visit_impl_item_method_mut(& mut self, method : & mut syn::ImplItemMethod) { + self.current_path.push(method.sig.ident.to_string()); + syn::visit_mut::visit_impl_item_method_mut(self, method); self.current_path + .pop(); } }; (visit_item_impl_mut) => { - fn visit_item_impl_mut(&mut self, item: &mut syn::ItemImpl) { - self.current_path - .push(item.self_ty.clone().into_token_stream().to_string()); - syn::visit_mut::visit_item_impl_mut(self, item); - self.current_path.pop(); - } + fn visit_item_impl_mut(& mut self, item : & mut syn::ItemImpl) { self + .current_path.push(item.self_ty.clone().into_token_stream().to_string()); + syn::visit_mut::visit_item_impl_mut(self, item); self.current_path.pop(); } }; (visit_item_mod_mut) => { - fn visit_item_mod_mut(&mut self, module: &mut syn::ItemMod) { - self.current_path.push(module.ident.to_string()); - syn::visit_mut::visit_item_mod_mut(self, module); - self.current_path.pop(); - } + fn visit_item_mod_mut(& mut self, module : & mut syn::ItemMod) { self + .current_path.push(module.ident.to_string()); + syn::visit_mut::visit_item_mod_mut(self, module); self.current_path.pop(); } }; } - pub(crate) use tracking;