diff --git a/rust2/Cargo.lock b/rust2/Cargo.lock index ee860bd..f0a73de 100644 --- a/rust2/Cargo.lock +++ b/rust2/Cargo.lock @@ -43,6 +43,7 @@ dependencies = [ "criterion", "insta", "owo-colors", + "rand", "tracing", "tracing-subscriber", ] @@ -258,6 +259,17 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "half" version = "1.8.2" @@ -470,6 +482,12 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -512,6 +530,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + [[package]] name = "rayon" version = "1.5.1" @@ -838,6 +886,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "wasm-bindgen" version = "0.2.80" diff --git a/rust2/Cargo.toml b/rust2/Cargo.toml index 73c390b..df577c4 100644 --- a/rust2/Cargo.toml +++ b/rust2/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" bumpalo = { version = "3.9.1", features = ["allocator_api"] } clap = { version = "3.1.9", features = ["derive"] } owo-colors = "3.3.0" +rand = "0.8.5" tracing = "0.1.34" tracing-subscriber = { version = "0.3.11", features = ["env-filter"] } diff --git a/rust2/src/lib.rs b/rust2/src/lib.rs index de2e840..841daa4 100644 --- a/rust2/src/lib.rs +++ b/rust2/src/lib.rs @@ -1,4 +1,5 @@ #![feature(allocator_api, let_else)] +#![feature(nonzero_ops)] #![deny(unsafe_op_in_unsafe_fn)] #![warn(rust_2018_idioms)] diff --git a/rust2/src/mir/mod.rs b/rust2/src/mir/mod.rs index 928b852..118782f 100644 --- a/rust2/src/mir/mod.rs +++ b/rust2/src/mir/mod.rs @@ -1,4 +1,18 @@ //! an experimental MIR (mid-level-ir) +//! +//! The MIR consists of two parts. First, there are instructions (`Stmt`). These instructions +//! can be seen as an extended version of the default brainfuck instruction set `+-<>,.[]`. +//! These instructions modify the classic tape. What MIR does is that it attaches an abstract +//! `MemoryState` to *each* statement. This state contains all facts known about the state of the +//! tape at the point of execution of the statement. +//! +//! For example, for the code `++.`, the `MemoryState` for the `.` instruction contains a single +//! fact: "The current cell was written to, by the instruction before and with the value 2". MIR +//! tracks as much of the reads/writes to determine their dependencies and eliminate as many +//! of them as possible. +//! +//! Note that MIR is always pessimized, so if it can't determine for sure that something is true, +//! it will not act on it. #![allow(dead_code)] mod opts; @@ -10,7 +24,7 @@ use bumpalo::Bump; use crate::{ hir::{Hir, StmtKind as HirStmtKind}, - mir::state::{MemoryState, Store, StoreInner}, + mir::state::{Load, MemoryState, Store}, parse::Span, BumpVec, }; @@ -48,8 +62,8 @@ enum StmtKind<'mir> { }, /// Left or Right pointer move (`<>`) PointerMove(i32), - Loop(Mir<'mir>), - Out, + Loop(Mir<'mir>, Load), + Out(Load), In(Store), SetN(u8, Store), } @@ -77,9 +91,9 @@ fn hir_to_mir<'mir>(alloc: &'mir Bump, hir: &Hir<'_>) -> Mir<'mir> { }, HirStmtKind::Right(n) => StmtKind::PointerMove(i32::try_from(n).unwrap()), HirStmtKind::Left(n) => StmtKind::PointerMove(-i32::try_from(n).unwrap()), - HirStmtKind::Loop(ref body) => StmtKind::Loop(hir_to_mir(alloc, body)), - HirStmtKind::Out => StmtKind::Out, - HirStmtKind::In => StmtKind::In(StoreInner::Unknown.into()), + HirStmtKind::Loop(ref body) => StmtKind::Loop(hir_to_mir(alloc, body), Load::Unknown), + HirStmtKind::Out => StmtKind::Out(Load::Unknown), + HirStmtKind::In => StmtKind::In(Store::unknown()), HirStmtKind::SetN(n) => StmtKind::SetN(n, Store::unknown()), }; Stmt { diff --git a/rust2/src/mir/opts.rs b/rust2/src/mir/opts.rs index af0effe..939c205 100644 --- a/rust2/src/mir/opts.rs +++ b/rust2/src/mir/opts.rs @@ -9,18 +9,18 @@ use crate::mir::{ /// this pass fills out as much state info for all statements as possible #[tracing::instrument(skip(alloc, mir))] pub fn passes<'mir>(alloc: &'mir Bump, mir: &mut Mir<'mir>) { - pass_get_state_info(alloc, mir); + pass_fill_state_info(alloc, mir); pass_const_propagation(mir); } /// this pass fills out as much state info for all statements as possible #[tracing::instrument(skip(alloc, mir))] -pub fn pass_get_state_info<'mir>(alloc: &'mir Bump, mir: &mut Mir<'mir>) { +pub fn pass_fill_state_info<'mir>(alloc: &'mir Bump, mir: &mut Mir<'mir>) { let empty_state = MemoryState::empty(alloc); - pass_get_state_info_inner(alloc, mir, empty_state); + pass_fill_state_info_inner(alloc, mir, empty_state); } #[tracing::instrument(skip(alloc, mir))] -fn pass_get_state_info_inner<'mir>( +fn pass_fill_state_info_inner<'mir>( alloc: &'mir Bump, mir: &mut Mir<'mir>, mut outer: MemoryState<'mir>, @@ -65,9 +65,9 @@ fn pass_get_state_info_inner<'mir>( StmtKind::PointerMove(n) => { MemoryState::single(alloc, outer, MemoryStateChange::Move(*n)) } - StmtKind::Loop(body) => { + StmtKind::Loop(body, _) => { // TODO: we can get a lot smarter here and get huge benefits; we don't yet - pass_get_state_info_inner(alloc, body, MemoryState::empty(alloc)); + pass_fill_state_info_inner(alloc, body, MemoryState::empty(alloc)); MemoryState::double( alloc, outer, @@ -80,7 +80,7 @@ fn pass_get_state_info_inner<'mir>( }, ) } - StmtKind::Out => outer, + StmtKind::Out(_) => outer, StmtKind::In(store) => MemoryState::single( alloc, outer, @@ -111,16 +111,16 @@ fn pass_const_propagation(mir: &mut Mir<'_>) { fn pass_const_propagation_inner(mir: &mut Mir<'_>) { for stmt in &mut mir.stmts { match &mut stmt.kind { - StmtKind::Out => { + StmtKind::Out(_) => { let state = stmt.state.state_for_offset(0); info!(?state, "We got the state of the output 😳😳😳"); // we could now insert a `SetN` before the `Out`, to mark the previous store // as dead. } - StmtKind::Loop(body) => { + StmtKind::Loop(body, _) => { let state = stmt.state.state_for_offset(0); info!(?state, "We got the state of the output 😳😳😳"); - // we could now insert a `SetN` before the `Out`, to mark the previous store + // we could now insert a `SetN` before the `Loop`, to mark the previous store // as dead. pass_const_propagation_inner(body); } diff --git a/rust2/src/mir/state.rs b/rust2/src/mir/state.rs index 2e16143..a56200e 100644 --- a/rust2/src/mir/state.rs +++ b/rust2/src/mir/state.rs @@ -3,6 +3,7 @@ use std::{ cell::{Cell, RefCell}, fmt::{Debug, Formatter}, + num::NonZeroU32, rc::Rc, }; @@ -10,25 +11,36 @@ use bumpalo::Bump; use crate::BumpVec; +/// The known state of a cell in the MIR #[derive(Debug, Clone)] pub enum CellState { + /// The state of this cell is completely unknown and could be anything, for example after `,` Unknown, + /// This cell is guaranteed to be `0` because a loop just terminated on it LoopNull, + /// Some value was written to this cell classified by the `Store`, but we do not know the value WrittenToUnknown(Store), + /// A known value was written to this cell WrittenToKnown(Store, u8), } /// A change in the known state of the memory caused by a single instruction #[derive(Debug, Clone)] pub enum MemoryStateChange { - /// A cell was changed + /// A cell value was changed to a new state. Change { offset: i32, new_state: CellState }, - /// The pointer was moved + /// The pointer was moved. This affects the `offset` calculations from previous states. Move(i32), - /// Forget everything + /// Forget everything about the memory state. This currently happens after each loop, since + /// the loop is opaque and might clobber everything. Forget, + /// Load a value from memory. This is not a direct change of the memory itself, but it does + /// change the state in that it marks the corresponding store, if any, as alive. Loads should + /// be eliminated whenever possible, to remove as many dead stores as possible. + Load(Option), } +/// The known state of memory at a specific instance in the instruction sequence #[derive(Clone)] pub struct MemoryState<'mir>(Rc>>); @@ -110,30 +122,77 @@ impl<'mir> MemoryStateInner<'mir> { } } +/// The abstract representation of a store in memory. Corresponding loads can also hold +/// a reference to this to mark the store as alive #[derive(Clone)] pub struct Store(Rc>); impl Store { pub fn unknown() -> Self { - StoreInner::Unknown.into() + StoreKind::Unknown.into() + } + + pub fn id(&self) -> u64 { + self.inner().id + } + + pub fn add_load(&self) { + let old = self.inner(); + let new_kind = match old.kind { + StoreKind::Unknown => StoreKind::UsedAtLeast(NonZeroU32::new(1).unwrap()), + StoreKind::UsedExact(n) => StoreKind::UsedExact(n.checked_add(1).unwrap()), + StoreKind::UsedAtLeast(n) => StoreKind::UsedAtLeast(n.checked_add(1).unwrap()), + StoreKind::Dead => StoreKind::UsedExact(NonZeroU32::new(1).unwrap()), + }; + self.0.set(StoreInner { + id: old.id, + kind: new_kind, + }) + } + + fn inner(&self) -> StoreInner { + self.0.get() } } impl Debug for Store { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - self.0.get().fmt(f) + self.inner().fmt(f) } } #[derive(Debug, Clone, Copy)] -pub enum StoreInner { +struct StoreInner { + id: u64, + kind: StoreKind, +} + +#[derive(Debug, Clone, Copy)] +enum StoreKind { + /// No information is known about uses of the store Unknown, - Used(usize), + /// The exact amount of subsequent loads is known about the store, and it's this + UsedExact(NonZeroU32), + /// The exact amount of subsequent loads not known about this store, but it's at least this + UsedAtLeast(NonZeroU32), + /// The store is known to be dead Dead, } -impl From for Store { - fn from(inner: StoreInner) -> Self { - Self(Rc::new(Cell::new(inner))) +impl From for Store { + fn from(kind: StoreKind) -> Self { + Self(Rc::new(Cell::new(StoreInner { + id: rand::random(), + kind, + }))) } } + +/// A load from memory and from which store it was acquired +#[derive(Debug, Clone)] +pub enum Load { + /// It is not known from which `Store` this was loaded + Unknown, + /// The load was acquired from this `Store`. The `Store` must either be `UsedExact` or `UsedAtLeast` + KnownStore(Store), +} diff --git a/rust2/test.bf b/rust2/test.bf index 96d7a46..2d9515d 100644 --- a/rust2/test.bf +++ b/rust2/test.bf @@ -1 +1 @@ -[-]++<>->[-]<<>. \ No newline at end of file +++[<] \ No newline at end of file