This commit is contained in:
nora 2022-04-17 18:39:06 +02:00
parent f88c486088
commit 039b5ea9c7
7 changed files with 156 additions and 27 deletions

54
rust2/Cargo.lock generated
View file

@ -43,6 +43,7 @@ dependencies = [
"criterion", "criterion",
"insta", "insta",
"owo-colors", "owo-colors",
"rand",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
] ]
@ -258,6 +259,17 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "getrandom"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]] [[package]]
name = "half" name = "half"
version = "1.8.2" version = "1.8.2"
@ -470,6 +482,12 @@ dependencies = [
"plotters-backend", "plotters-backend",
] ]
[[package]]
name = "ppv-lite86"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
[[package]] [[package]]
name = "proc-macro-error" name = "proc-macro-error"
version = "1.0.4" version = "1.0.4"
@ -512,6 +530,36 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom",
]
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.5.1" version = "1.5.1"
@ -838,6 +886,12 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]] [[package]]
name = "wasm-bindgen" name = "wasm-bindgen"
version = "0.2.80" version = "0.2.80"

View file

@ -9,6 +9,7 @@ edition = "2021"
bumpalo = { version = "3.9.1", features = ["allocator_api"] } bumpalo = { version = "3.9.1", features = ["allocator_api"] }
clap = { version = "3.1.9", features = ["derive"] } clap = { version = "3.1.9", features = ["derive"] }
owo-colors = "3.3.0" owo-colors = "3.3.0"
rand = "0.8.5"
tracing = "0.1.34" tracing = "0.1.34"
tracing-subscriber = { version = "0.3.11", features = ["env-filter"] } tracing-subscriber = { version = "0.3.11", features = ["env-filter"] }

View file

@ -1,4 +1,5 @@
#![feature(allocator_api, let_else)] #![feature(allocator_api, let_else)]
#![feature(nonzero_ops)]
#![deny(unsafe_op_in_unsafe_fn)] #![deny(unsafe_op_in_unsafe_fn)]
#![warn(rust_2018_idioms)] #![warn(rust_2018_idioms)]

View file

@ -1,4 +1,18 @@
//! an experimental MIR (mid-level-ir) //! an experimental MIR (mid-level-ir)
//!
//! The MIR consists of two parts. First, there are instructions (`Stmt`). These instructions
//! can be seen as an extended version of the default brainfuck instruction set `+-<>,.[]`.
//! These instructions modify the classic tape. What MIR does is that it attaches an abstract
//! `MemoryState` to *each* statement. This state contains all facts known about the state of the
//! tape at the point of execution of the statement.
//!
//! For example, for the code `++.`, the `MemoryState` for the `.` instruction contains a single
//! fact: "The current cell was written to, by the instruction before and with the value 2". MIR
//! tracks as much of the reads/writes to determine their dependencies and eliminate as many
//! of them as possible.
//!
//! Note that MIR is always pessimized, so if it can't determine for sure that something is true,
//! it will not act on it.
#![allow(dead_code)] #![allow(dead_code)]
mod opts; mod opts;
@ -10,7 +24,7 @@ use bumpalo::Bump;
use crate::{ use crate::{
hir::{Hir, StmtKind as HirStmtKind}, hir::{Hir, StmtKind as HirStmtKind},
mir::state::{MemoryState, Store, StoreInner}, mir::state::{Load, MemoryState, Store},
parse::Span, parse::Span,
BumpVec, BumpVec,
}; };
@ -48,8 +62,8 @@ enum StmtKind<'mir> {
}, },
/// Left or Right pointer move (`<>`) /// Left or Right pointer move (`<>`)
PointerMove(i32), PointerMove(i32),
Loop(Mir<'mir>), Loop(Mir<'mir>, Load),
Out, Out(Load),
In(Store), In(Store),
SetN(u8, Store), SetN(u8, Store),
} }
@ -77,9 +91,9 @@ fn hir_to_mir<'mir>(alloc: &'mir Bump, hir: &Hir<'_>) -> Mir<'mir> {
}, },
HirStmtKind::Right(n) => StmtKind::PointerMove(i32::try_from(n).unwrap()), HirStmtKind::Right(n) => StmtKind::PointerMove(i32::try_from(n).unwrap()),
HirStmtKind::Left(n) => StmtKind::PointerMove(-i32::try_from(n).unwrap()), HirStmtKind::Left(n) => StmtKind::PointerMove(-i32::try_from(n).unwrap()),
HirStmtKind::Loop(ref body) => StmtKind::Loop(hir_to_mir(alloc, body)), HirStmtKind::Loop(ref body) => StmtKind::Loop(hir_to_mir(alloc, body), Load::Unknown),
HirStmtKind::Out => StmtKind::Out, HirStmtKind::Out => StmtKind::Out(Load::Unknown),
HirStmtKind::In => StmtKind::In(StoreInner::Unknown.into()), HirStmtKind::In => StmtKind::In(Store::unknown()),
HirStmtKind::SetN(n) => StmtKind::SetN(n, Store::unknown()), HirStmtKind::SetN(n) => StmtKind::SetN(n, Store::unknown()),
}; };
Stmt { Stmt {

View file

@ -9,18 +9,18 @@ use crate::mir::{
/// this pass fills out as much state info for all statements as possible /// this pass fills out as much state info for all statements as possible
#[tracing::instrument(skip(alloc, mir))] #[tracing::instrument(skip(alloc, mir))]
pub fn passes<'mir>(alloc: &'mir Bump, mir: &mut Mir<'mir>) { pub fn passes<'mir>(alloc: &'mir Bump, mir: &mut Mir<'mir>) {
pass_get_state_info(alloc, mir); pass_fill_state_info(alloc, mir);
pass_const_propagation(mir); pass_const_propagation(mir);
} }
/// this pass fills out as much state info for all statements as possible /// this pass fills out as much state info for all statements as possible
#[tracing::instrument(skip(alloc, mir))] #[tracing::instrument(skip(alloc, mir))]
pub fn pass_get_state_info<'mir>(alloc: &'mir Bump, mir: &mut Mir<'mir>) { pub fn pass_fill_state_info<'mir>(alloc: &'mir Bump, mir: &mut Mir<'mir>) {
let empty_state = MemoryState::empty(alloc); let empty_state = MemoryState::empty(alloc);
pass_get_state_info_inner(alloc, mir, empty_state); pass_fill_state_info_inner(alloc, mir, empty_state);
} }
#[tracing::instrument(skip(alloc, mir))] #[tracing::instrument(skip(alloc, mir))]
fn pass_get_state_info_inner<'mir>( fn pass_fill_state_info_inner<'mir>(
alloc: &'mir Bump, alloc: &'mir Bump,
mir: &mut Mir<'mir>, mir: &mut Mir<'mir>,
mut outer: MemoryState<'mir>, mut outer: MemoryState<'mir>,
@ -65,9 +65,9 @@ fn pass_get_state_info_inner<'mir>(
StmtKind::PointerMove(n) => { StmtKind::PointerMove(n) => {
MemoryState::single(alloc, outer, MemoryStateChange::Move(*n)) MemoryState::single(alloc, outer, MemoryStateChange::Move(*n))
} }
StmtKind::Loop(body) => { StmtKind::Loop(body, _) => {
// TODO: we can get a lot smarter here and get huge benefits; we don't yet // TODO: we can get a lot smarter here and get huge benefits; we don't yet
pass_get_state_info_inner(alloc, body, MemoryState::empty(alloc)); pass_fill_state_info_inner(alloc, body, MemoryState::empty(alloc));
MemoryState::double( MemoryState::double(
alloc, alloc,
outer, outer,
@ -80,7 +80,7 @@ fn pass_get_state_info_inner<'mir>(
}, },
) )
} }
StmtKind::Out => outer, StmtKind::Out(_) => outer,
StmtKind::In(store) => MemoryState::single( StmtKind::In(store) => MemoryState::single(
alloc, alloc,
outer, outer,
@ -111,16 +111,16 @@ fn pass_const_propagation(mir: &mut Mir<'_>) {
fn pass_const_propagation_inner(mir: &mut Mir<'_>) { fn pass_const_propagation_inner(mir: &mut Mir<'_>) {
for stmt in &mut mir.stmts { for stmt in &mut mir.stmts {
match &mut stmt.kind { match &mut stmt.kind {
StmtKind::Out => { StmtKind::Out(_) => {
let state = stmt.state.state_for_offset(0); let state = stmt.state.state_for_offset(0);
info!(?state, "We got the state of the output 😳😳😳"); info!(?state, "We got the state of the output 😳😳😳");
// we could now insert a `SetN` before the `Out`, to mark the previous store // we could now insert a `SetN` before the `Out`, to mark the previous store
// as dead. // as dead.
} }
StmtKind::Loop(body) => { StmtKind::Loop(body, _) => {
let state = stmt.state.state_for_offset(0); let state = stmt.state.state_for_offset(0);
info!(?state, "We got the state of the output 😳😳😳"); info!(?state, "We got the state of the output 😳😳😳");
// we could now insert a `SetN` before the `Out`, to mark the previous store // we could now insert a `SetN` before the `Loop`, to mark the previous store
// as dead. // as dead.
pass_const_propagation_inner(body); pass_const_propagation_inner(body);
} }

View file

@ -3,6 +3,7 @@
use std::{ use std::{
cell::{Cell, RefCell}, cell::{Cell, RefCell},
fmt::{Debug, Formatter}, fmt::{Debug, Formatter},
num::NonZeroU32,
rc::Rc, rc::Rc,
}; };
@ -10,25 +11,36 @@ use bumpalo::Bump;
use crate::BumpVec; use crate::BumpVec;
/// The known state of a cell in the MIR
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum CellState { pub enum CellState {
/// The state of this cell is completely unknown and could be anything, for example after `,`
Unknown, Unknown,
/// This cell is guaranteed to be `0` because a loop just terminated on it
LoopNull, LoopNull,
/// Some value was written to this cell classified by the `Store`, but we do not know the value
WrittenToUnknown(Store), WrittenToUnknown(Store),
/// A known value was written to this cell
WrittenToKnown(Store, u8), WrittenToKnown(Store, u8),
} }
/// A change in the known state of the memory caused by a single instruction /// A change in the known state of the memory caused by a single instruction
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum MemoryStateChange { pub enum MemoryStateChange {
/// A cell was changed /// A cell value was changed to a new state.
Change { offset: i32, new_state: CellState }, Change { offset: i32, new_state: CellState },
/// The pointer was moved /// The pointer was moved. This affects the `offset` calculations from previous states.
Move(i32), Move(i32),
/// Forget everything /// Forget everything about the memory state. This currently happens after each loop, since
/// the loop is opaque and might clobber everything.
Forget, Forget,
/// Load a value from memory. This is not a direct change of the memory itself, but it does
/// change the state in that it marks the corresponding store, if any, as alive. Loads should
/// be eliminated whenever possible, to remove as many dead stores as possible.
Load(Option<Store>),
} }
/// The known state of memory at a specific instance in the instruction sequence
#[derive(Clone)] #[derive(Clone)]
pub struct MemoryState<'mir>(Rc<RefCell<MemoryStateInner<'mir>>>); pub struct MemoryState<'mir>(Rc<RefCell<MemoryStateInner<'mir>>>);
@ -110,30 +122,77 @@ impl<'mir> MemoryStateInner<'mir> {
} }
} }
/// The abstract representation of a store in memory. Corresponding loads can also hold
/// a reference to this to mark the store as alive
#[derive(Clone)] #[derive(Clone)]
pub struct Store(Rc<Cell<StoreInner>>); pub struct Store(Rc<Cell<StoreInner>>);
impl Store { impl Store {
pub fn unknown() -> Self { pub fn unknown() -> Self {
StoreInner::Unknown.into() StoreKind::Unknown.into()
}
pub fn id(&self) -> u64 {
self.inner().id
}
pub fn add_load(&self) {
let old = self.inner();
let new_kind = match old.kind {
StoreKind::Unknown => StoreKind::UsedAtLeast(NonZeroU32::new(1).unwrap()),
StoreKind::UsedExact(n) => StoreKind::UsedExact(n.checked_add(1).unwrap()),
StoreKind::UsedAtLeast(n) => StoreKind::UsedAtLeast(n.checked_add(1).unwrap()),
StoreKind::Dead => StoreKind::UsedExact(NonZeroU32::new(1).unwrap()),
};
self.0.set(StoreInner {
id: old.id,
kind: new_kind,
})
}
fn inner(&self) -> StoreInner {
self.0.get()
} }
} }
impl Debug for Store { impl Debug for Store {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.get().fmt(f) self.inner().fmt(f)
} }
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum StoreInner { struct StoreInner {
id: u64,
kind: StoreKind,
}
#[derive(Debug, Clone, Copy)]
enum StoreKind {
/// No information is known about uses of the store
Unknown, Unknown,
Used(usize), /// The exact amount of subsequent loads is known about the store, and it's this
UsedExact(NonZeroU32),
/// The exact amount of subsequent loads not known about this store, but it's at least this
UsedAtLeast(NonZeroU32),
/// The store is known to be dead
Dead, Dead,
} }
impl From<StoreInner> for Store { impl From<StoreKind> for Store {
fn from(inner: StoreInner) -> Self { fn from(kind: StoreKind) -> Self {
Self(Rc::new(Cell::new(inner))) Self(Rc::new(Cell::new(StoreInner {
id: rand::random(),
kind,
})))
} }
} }
/// A load from memory and from which store it was acquired
#[derive(Debug, Clone)]
pub enum Load {
/// It is not known from which `Store` this was loaded
Unknown,
/// The load was acquired from this `Store`. The `Store` must either be `UsedExact` or `UsedAtLeast`
KnownStore(Store),
}

View file

@ -1 +1 @@
[-]++<>->[-]<<>. ++[<]