diff --git a/src/ast.rs b/src/ast.rs index 6c3cb4b..5c7d02e 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -4,12 +4,12 @@ //! All AST nodes are bump allocated into the lifetime `'ast` use crate::errors::Span; -use crate::value::AstSymbol; +use crate::gc::Symbol; use bumpalo::collections::Vec; #[derive(Debug, PartialEq, Eq, Hash)] -pub struct Ident<'ast> { - pub sym: AstSymbol<'ast>, +pub struct Ident { + pub sym: Symbol, pub span: Span, } @@ -40,7 +40,7 @@ pub enum Stmt<'ast> { #[derive(Debug, PartialEq)] pub struct Declaration<'ast> { pub span: Span, - pub name: Ident<'ast>, + pub name: Ident, pub init: Expr<'ast>, } @@ -54,8 +54,8 @@ pub struct Assignment<'ast> { #[derive(Debug, PartialEq)] pub struct FnDecl<'ast> { pub span: Span, - pub name: Ident<'ast>, - pub params: Vec<'ast, Ident<'ast>>, + pub name: Ident, + pub params: Vec<'ast, Ident>, pub body: Block<'ast>, } @@ -91,7 +91,7 @@ pub struct WhileStmt<'ast> { #[derive(Debug, PartialEq)] pub enum Expr<'ast> { - Ident(Ident<'ast>), + Ident(Ident), Literal(Literal<'ast>), UnaryOp(&'ast UnaryOp<'ast>), BinaryOp(&'ast BinaryOp<'ast>), @@ -180,6 +180,6 @@ pub struct Call<'ast> { #[derive(Debug, PartialEq)] pub enum CallKind<'ast> { - Field(Ident<'ast>), + Field(Ident), Fn(Vec<'ast, Expr<'ast>>), } diff --git a/src/bytecode.rs b/src/bytecode.rs index 8f8a1bf..fcad165 100644 --- a/src/bytecode.rs +++ b/src/bytecode.rs @@ -1,7 +1,8 @@ //! The bytecode that is executed in the vm use crate::errors::Span; -use crate::value::{HashMap, NewSym}; +use crate::gc::Symbol; +use crate::HashMap; use bumpalo::collections::Vec; #[derive(Debug)] @@ -47,5 +48,5 @@ pub enum Value { Num(f64), String, Array, - Object(HashMap), + Object(HashMap), } diff --git a/src/compile.rs b/src/compile.rs index b6bd115..dd1e887 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -6,7 +6,8 @@ use crate::ast::{ }; use crate::bytecode::{FnBlock, Instr, Value}; use crate::errors::{CompilerError, Span}; -use crate::value::HashMap; +use crate::gc::Symbol; +use crate::HashMap; use bumpalo::collections::Vec; use bumpalo::Bump; use std::cell::RefCell; @@ -16,14 +17,14 @@ type CResult = Result; #[derive(Debug, Default)] struct Env<'ast> { - locals: HashMap<&'ast str, usize>, + locals: HashMap, outer: Option>>>, } impl Env<'_> { fn lookup_local(&self, name: &Ident) -> CResult { fn lookup_inner(env: &Env, name: &Ident) -> Option { - env.locals.get(name.sym).copied().or_else(|| { + env.locals.get(&name.sym).copied().or_else(|| { env.outer .as_ref() .map(|outer| lookup_inner(&outer.borrow(), name)) @@ -32,7 +33,10 @@ impl Env<'_> { } lookup_inner(self, name).ok_or_else(|| { - CompilerError::new(name.span, format!("variable {} not found", name.sym)) + CompilerError::new( + name.span, + format!("variable {} not found", name.sym.as_str()), + ) }) } diff --git a/src/value.rs b/src/gc.rs similarity index 54% rename from src/value.rs rename to src/gc.rs index fd684c7..41a4a28 100644 --- a/src/value.rs +++ b/src/gc.rs @@ -1,30 +1,12 @@ #![allow(dead_code)] +use crate::HashSet; use std::collections::LinkedList; use std::fmt::{Debug, Formatter}; +use std::hash::{Hash, Hasher}; use std::ops::Deref; use std::ptr::NonNull; -/// imagine interning or something here -pub type AstSymbol<'ast> = &'ast str; - -/// here is the actual interning or something -pub type NewSym = Gc; - -#[cfg(not(feature = "fxhash"))] -#[allow(clippy::disallowed_type)] -pub type HashMap = std::collections::HashMap; - -#[cfg(feature = "fxhash")] -pub type HashMap = rustc_hash::FxHashMap; - -#[cfg(not(feature = "fxhash"))] -#[allow(clippy::disallowed_type)] -pub type HashSet = std::collections::HashSet; - -#[cfg(feature = "fxhash")] -pub type HashSet = rustc_hash::FxHashSet; - /// A pointer to a garbage collected value. This pointer *must* always be valid, and a value /// is only allowed to be freed once no Gc is pointing at it anymore. This is achieved through /// tracing through all objects from a few known roots and marking every reachable value. All other @@ -59,35 +41,95 @@ impl Clone for Gc { impl Copy for Gc {} -enum Object { +/// An interned String. Hashing and Equality are O(1) and just look at the pointer address +#[derive(Debug, Clone, Copy)] +pub struct Symbol { + gc: Gc, +} + +impl Symbol { + pub fn new(gc: Gc) -> Self { + Self { gc } + } + + fn address(&self) -> usize { + self.gc.ptr.as_ptr() as *mut u8 as usize + } + + pub fn as_str(&self) -> &str { + self.gc.deref() + } +} + +impl Hash for Symbol { + fn hash(&self, state: &mut H) { + self.address().hash(state); + } +} + +impl PartialEq for Symbol { + fn eq(&self, other: &Self) -> bool { + self.address() == other.address() + } +} + +impl Eq for Symbol {} + +impl Deref for Symbol { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +#[derive(Debug)] +struct Object { + kind: ObjectKind, +} + +#[derive(Debug)] +enum ObjectKind { String(Gc), } +#[derive(Debug)] pub struct RtAlloc { symbols: HashSet>, objects: LinkedList, } impl RtAlloc { - pub fn alloc_str(&mut self, str: &str) -> Gc { + /// # Safety + /// Promise to not forget to mark any roots and to not deref `Gc` after you've dropped me 🥺 + pub unsafe fn new() -> Self { + Self { + symbols: HashSet::default(), + objects: LinkedList::new(), + } + } + + fn alloc_str(&mut self, str: &str) -> Gc { let ptr = Box::into_raw(str.to_owned().into_boxed_str()); // SAFETY: Box cannot be null let new_nonnull = unsafe { NonNull::new_unchecked(ptr) }; let gc = Gc { ptr: new_nonnull }; - let object = Object::String(gc.clone()); + let object = Object { + kind: ObjectKind::String(gc.clone()), + }; self.objects.push_back(object); gc } - pub fn intern_string(&mut self, str: &str) -> NewSym { + pub fn intern_string(&mut self, str: &str) -> Symbol { let original_nonnull = NonNull::from(str); if let Some(interned) = self.symbols.get(&original_nonnull) { - return Gc { ptr: *interned }; + return Symbol::new(Gc { ptr: *interned }); } - self.alloc_str(str) + Symbol::new(self.alloc_str(str)) } } diff --git a/src/lex.rs b/src/lex.rs index 4bc29f4..afaa035 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -5,6 +5,8 @@ //! is an iterator, and can therefore be used without any allocations use crate::errors::{CompilerError, Span}; +use crate::gc::Symbol; +use crate::RtAlloc; use std::iter::Peekable; use std::str::CharIndices; @@ -13,26 +15,26 @@ use std::str::CharIndices; /// /// For example `for`, `"hello"`, `main` or `.` #[derive(Debug, Clone)] -pub struct Token<'code> { +pub struct Token { pub span: Span, - pub kind: TokenKind<'code>, + pub kind: TokenKind, } -impl<'code> Token<'code> { - fn single_span(start: usize, kind: TokenKind<'code>) -> Token<'code> { +impl Token { + fn single_span(start: usize, kind: TokenKind) -> Token { Self { span: Span::single(start), kind, } } - fn new(span: Span, kind: TokenKind<'code>) -> Token<'code> { + fn new(span: Span, kind: TokenKind) -> Token { Self { span, kind } } } #[derive(Debug, Clone, PartialEq)] -pub enum TokenKind<'code> { +pub enum TokenKind { // keywords Let, Print, @@ -51,10 +53,10 @@ pub enum TokenKind<'code> { Or, Not, // literals - String(String), + String(Symbol), Number(f64), // ident - Ident(&'code str), + Ident(Symbol), // punctuation /// ; Semi, @@ -103,17 +105,19 @@ pub enum TokenKind<'code> { Error(Box), } -#[derive(Debug, Clone)] -pub struct Lexer<'code> { +#[derive(Debug)] +pub struct Lexer<'code, 'gc> { code: Peekable>, src: &'code str, + rt_alloc: &'gc mut RtAlloc, } -impl<'code> Lexer<'code> { - pub fn new(code: &'code str) -> Self { +impl<'code, 'gc> Lexer<'code, 'gc> { + pub fn new(code: &'code str, rt_alloc: &'gc mut RtAlloc) -> Self { Self { code: code.char_indices().peekable(), src: code, + rt_alloc, } } @@ -127,10 +131,10 @@ impl<'code> Lexer<'code> { fn maybe_next_char<'a>( &mut self, expect_char: char, - true_type: TokenKind<'a>, - false_type: TokenKind<'a>, + true_type: TokenKind, + false_type: TokenKind, start: usize, - ) -> Token<'a> { + ) -> Token { if self.expect(expect_char) { let _ = self.code.next(); // consume first one Token { @@ -144,10 +148,32 @@ impl<'code> Lexer<'code> { } } } + + fn keyword_or_ident(&mut self, name: &str) -> TokenKind { + match name { + "loop" => TokenKind::Loop, + "let" => TokenKind::Let, + "fn" => TokenKind::Fn, + "for" => TokenKind::For, + "false" => TokenKind::False, + "if" => TokenKind::If, + "else" => TokenKind::Else, + "while" => TokenKind::While, + "break" => TokenKind::Break, + "return" => TokenKind::Return, + "true" => TokenKind::True, + "null" => TokenKind::Null, + "not" => TokenKind::Not, + "and" => TokenKind::And, + "or" => TokenKind::Or, + "print" => TokenKind::Print, + _ => TokenKind::Ident(self.rt_alloc.intern_string(name)), + } + } } -impl<'code> Iterator for Lexer<'code> { - type Item = Token<'code>; +impl<'code, 'gc> Iterator for Lexer<'code, 'gc> { + type Item = Token; fn next(&mut self) -> Option { let token = loop { @@ -244,7 +270,10 @@ impl<'code> Iterator for Lexer<'code> { } } }; - break Token::new(Span::start_end(start, end), TokenKind::String(buffer)); + break Token::new( + Span::start_end(start, end), + TokenKind::String(self.rt_alloc.intern_string(&buffer)), + ); } char => { if char.is_ascii_digit() { @@ -295,7 +324,7 @@ impl<'code> Iterator for Lexer<'code> { }; break Token::new( Span::start_end(start, end), - keyword_or_ident(&self.src[start..end]), + self.keyword_or_ident(&self.src[start..end]), ); } else { break Token::new( @@ -316,28 +345,6 @@ impl<'code> Iterator for Lexer<'code> { } } -fn keyword_or_ident(name: &str) -> TokenKind { - match name { - "loop" => TokenKind::Loop, - "let" => TokenKind::Let, - "fn" => TokenKind::Fn, - "for" => TokenKind::For, - "false" => TokenKind::False, - "if" => TokenKind::If, - "else" => TokenKind::Else, - "while" => TokenKind::While, - "break" => TokenKind::Break, - "return" => TokenKind::Return, - "true" => TokenKind::True, - "null" => TokenKind::Null, - "not" => TokenKind::Not, - "and" => TokenKind::And, - "or" => TokenKind::Or, - "print" => TokenKind::Print, - _ => TokenKind::Ident(name), - } -} - fn is_valid_ident_part(char: char) -> bool { char.is_alphanumeric() || char == '_' } @@ -346,7 +353,7 @@ fn is_valid_ident_start(char: char) -> bool { char.is_alphabetic() || char == '_' } -#[cfg(test)] +#[cfg(test_ignore)] mod test { use crate::lex::Lexer; use crate::lex::TokenKind::{self, *}; diff --git a/src/lib.rs b/src/lib.rs index f0da6cf..3b42f15 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,29 +4,48 @@ mod ast; mod bytecode; mod compile; mod errors; +mod gc; mod lex; mod parse; -mod value; +mod vm; use crate::ast::Program; +use crate::gc::RtAlloc; pub use bumpalo::Bump; pub use lex::*; pub use parse::*; +#[cfg(not(feature = "fxhash"))] +#[allow(clippy::disallowed_type)] +type HashMap = std::collections::HashMap; + +#[cfg(feature = "fxhash")] +type HashMap = rustc_hash::FxHashMap; + +#[cfg(not(feature = "fxhash"))] +#[allow(clippy::disallowed_type)] +type HashSet = std::collections::HashSet; + +#[cfg(feature = "fxhash")] +type HashSet = rustc_hash::FxHashSet; + pub fn run_program(program: &str) { let ast_alloc = Bump::new(); - let lexer = lex::Lexer::new(program); + // SAFETY: I will try to 🥺 + let mut runtime = unsafe { RtAlloc::new() }; + + let lexer = lex::Lexer::new(program, &mut runtime); let ast = parse::parse(lexer, &ast_alloc); match ast { - Ok(ast) => process_ast(program, ast), + Ok(ast) => process_ast(program, ast, runtime), Err(err) => errors::display_error(program, err), } } -fn process_ast(program: &str, ast: Program) { +fn process_ast(program: &str, ast: Program, runtime: RtAlloc) { println!("AST:\n{:?}\n", ast); let bytecode_alloc = Bump::new(); @@ -34,7 +53,11 @@ fn process_ast(program: &str, ast: Program) { let bytecode = compile::compile(&ast, &bytecode_alloc); match bytecode { - Ok(code) => println!("Bytecode:\n{:#?}\n", code), + Ok(code) => { + println!("Bytecode:\n{:#?}\n", code); + + let _result_lol = vm::execute(&code, runtime); + } Err(err) => errors::display_error(program, err), } } diff --git a/src/parse.rs b/src/parse.rs index 72dc9ee..a9790ab 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -14,10 +14,9 @@ use bumpalo::Bump; use std::iter::Peekable; #[derive(Debug)] -struct Parser<'code, 'ast, I> +struct Parser<'ast, I> where - I: Iterator>, - I: 'code, + I: Iterator, { tokens: Peekable, depth: usize, @@ -26,8 +25,8 @@ where bump: &'ast Bump, } -pub fn parse<'ast, 'code>( - tokens: impl Iterator> + 'code, +pub fn parse<'lexer, 'ast>( + tokens: impl Iterator + 'lexer, ast_bump: &'ast Bump, ) -> Result, CompilerError> { let mut parser = Parser { @@ -72,10 +71,9 @@ macro_rules! enter_parse { }; } -impl<'code, 'ast, I> Parser<'code, 'ast, I> +impl<'ast, I> Parser<'ast, I> where - I: Iterator>, - I: 'code, + I: Iterator, { const MAX_DEPTH: usize = 100; @@ -189,7 +187,7 @@ where })) } - fn fn_args(&mut self) -> ParseResult>> { + fn fn_args(&mut self) -> ParseResult> { enter_parse!(self); self.expect(TokenKind::ParenO)?; @@ -542,8 +540,8 @@ where let _ = self.expect(TokenKind::ParenC)?; Ok(expr) } - TokenKind::Ident(name) => Ok(Expr::Ident(Ident { - sym: self.bump.alloc_str(name), + TokenKind::Ident(sym) => Ok(Expr::Ident(Ident { + sym, span: next.span, })), TokenKind::Error(error) => Err(*error), @@ -556,17 +554,14 @@ where return_expr } - fn ident(&mut self) -> ParseResult> { + fn ident(&mut self) -> ParseResult { enter_parse!(self); let Token { kind, span } = self .next() .ok_or_else(|| CompilerError::eof("identifier"))?; let return_expr = match kind { - TokenKind::Ident(name) => Ok(Ident { - sym: self.bump.alloc_str(name), - span, - }), + TokenKind::Ident(sym) => Ok(Ident { sym, span }), TokenKind::Error(error) => Err(*error), _ => { return Err(CompilerError::new( @@ -602,11 +597,7 @@ where return_expr } - fn parse_list( - &mut self, - close: TokenKind<'code>, - mut parser: F, - ) -> ParseResult> + fn parse_list(&mut self, close: TokenKind, mut parser: F) -> ParseResult> where F: FnMut(&mut Self) -> ParseResult, { @@ -645,19 +636,19 @@ where // token helpers - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option { self.tokens.next() } - fn peek(&mut self) -> Option<&Token<'code>> { + fn peek(&mut self) -> Option<&Token> { self.tokens.peek() } - fn peek_kind(&mut self) -> Option<&TokenKind<'code>> { + fn peek_kind(&mut self) -> Option<&TokenKind> { self.peek().map(|token| &token.kind) } - fn expect(&mut self, kind: TokenKind<'code>) -> ParseResult { + fn expect(&mut self, kind: TokenKind) -> ParseResult { if let Some(token) = self.next() { if token.kind == kind { Ok(token) diff --git a/src/vm.rs b/src/vm.rs new file mode 100644 index 0000000..a3407a3 --- /dev/null +++ b/src/vm.rs @@ -0,0 +1,20 @@ +use crate::bytecode::FnBlock; +use crate::gc::RtAlloc; + +type VmResult = Result<(), ()>; + +pub fn execute<'bc>(bytecode: &'bc [FnBlock<'bc>], alloc: RtAlloc) -> Result<(), ()> { + let _vm = Vm { + blocks: bytecode, + current: bytecode.first().ok_or(())?, + alloc, + }; + + Ok(()) +} + +struct Vm<'bc> { + blocks: &'bc [FnBlock<'bc>], + current: &'bc FnBlock<'bc>, + alloc: RtAlloc, +}