From d1179ff2ea9ecb38f56c330fc3ba638a72185265 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Fri, 31 Dec 2021 16:44:21 +0100 Subject: [PATCH] hello world! --- clippy.toml | 5 +- src/ast.rs | 2 +- src/bytecode.rs | 19 ++----- src/compile.rs | 29 +++++----- src/gc.rs | 138 +++++++++++++++++++++++++++++--------------- src/lex.rs | 2 +- src/lib.rs | 9 ++- src/parse.rs | 12 ++-- src/vm.rs | 148 ++++++++++++++++++++++++++++++++++++++++++++---- test.dil | 11 +--- 10 files changed, 263 insertions(+), 112 deletions(-) diff --git a/clippy.toml b/clippy.toml index 77fc7e2..f46ff6c 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1,5 +1,4 @@ disallowed-types = [ - { path = "std::collections::HashMap", reason = "may be fxhash or siphash, depending on the feature, stay flexible" }, - { path = "std::collections::HashSet", reason = "may be fxhash or siphash, depending on the feature, stay flexible" }, - { path = "std::collections::Vec", reason = "we generally want to use bumpalos collections" }, + "std::collections::HashMap", + "std::collections::HashSet", ] diff --git a/src/ast.rs b/src/ast.rs index 5c7d02e..38a916b 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -112,7 +112,7 @@ impl Expr<'_> { #[derive(Debug, PartialEq)] pub enum Literal<'ast> { - String(&'ast str, Span), + String(Symbol, Span), Number(f64, Span), Array(Vec<'ast, Expr<'ast>>, Span), Object(Span), diff --git a/src/bytecode.rs b/src/bytecode.rs index fcad165..6b57deb 100644 --- a/src/bytecode.rs +++ b/src/bytecode.rs @@ -1,26 +1,25 @@ //! The bytecode that is executed in the vm use crate::errors::Span; -use crate::gc::Symbol; -use crate::HashMap; +use crate::vm::Value; use bumpalo::collections::Vec; #[derive(Debug)] pub struct FnBlock<'bc> { - pub code: Vec<'bc, Instr<'bc>>, + pub code: Vec<'bc, Instr>, pub stack_sizes: Vec<'bc, usize>, pub spans: Vec<'bc, Span>, pub arity: u8, } #[derive(Debug, Clone, Copy)] -pub enum Instr<'bc> { +pub enum Instr { /// Store the current value on the stack to the stack location with the local offset `usize` Store(usize), /// Load the variable value from the local offset `usize` onto the stack Load(usize), /// Push a value onto the stack - PushVal(&'bc Value), + PushVal(Value), /// Negate the top value on the stack. Only works with numbers and booleans Neg, BinAdd, @@ -40,13 +39,3 @@ pub enum Instr<'bc> { /// Println the value on top of the stack Print, } - -#[derive(Debug)] -pub enum Value { - Null, - Bool(bool), - Num(f64), - String, - Array, - Object(HashMap), -} diff --git a/src/compile.rs b/src/compile.rs index dd1e887..e3eb4f7 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -4,10 +4,11 @@ use crate::ast::{ Assignment, BinaryOp, BinaryOpKind, Block, Call, Declaration, Expr, FnDecl, Ident, IfStmt, Literal, Program, Stmt, UnaryOp, WhileStmt, }; -use crate::bytecode::{FnBlock, Instr, Value}; +use crate::bytecode::{FnBlock, Instr}; use crate::errors::{CompilerError, Span}; use crate::gc::Symbol; -use crate::HashMap; +use crate::vm::Value; +use crate::{HashMap, RtAlloc}; use bumpalo::collections::Vec; use bumpalo::Bump; use std::cell::RefCell; @@ -49,23 +50,26 @@ impl Env<'_> { } #[derive(Debug)] -struct Compiler<'ast, 'bc> { +struct Compiler<'ast, 'bc, 'gc> { blocks: Vec<'bc, FnBlock<'bc>>, current_block: usize, bump: &'bc Bump, /// the current local variables that are in scope, only needed for compiling env: Rc>>, + rt: &'gc mut RtAlloc, } -pub fn compile<'bc>( - ast: &Program, +pub fn compile<'ast, 'bc, 'gc>( + ast: &'ast Program, bytecode_bump: &'bc Bump, + rt: &'gc mut RtAlloc, ) -> Result>, CompilerError> { let mut compiler = Compiler { blocks: Vec::new_in(bytecode_bump), current_block: 0, bump: bytecode_bump, env: Rc::new(RefCell::new(Default::default())), + rt, }; compiler.compile(ast)?; @@ -73,7 +77,7 @@ pub fn compile<'bc>( Ok(compiler.blocks) } -impl<'ast, 'bc> Compiler<'ast, 'bc> { +impl<'ast, 'bc, 'gc> Compiler<'ast, 'bc, 'gc> { fn compile(&mut self, ast: &'ast Program<'ast>) -> CResult<()> { let global_block = FnBlock { code: Vec::new_in(self.bump), @@ -199,7 +203,7 @@ impl<'ast, 'bc> Compiler<'ast, 'bc> { fn compile_expr_literal(&mut self, lit: &Literal) -> CResult<()> { let value = match lit { - Literal::String(_str, _) => Value::String, + Literal::String(str, _) => Value::String(*str), Literal::Number(num, _) => Value::Num(*num), Literal::Array(vec, _) => { if vec.is_empty() { @@ -208,16 +212,12 @@ impl<'ast, 'bc> Compiler<'ast, 'bc> { todo!() } } - Literal::Object(_) => Value::Object(HashMap::default()), + Literal::Object(_) => Value::Object(self.rt.alloc_obj(HashMap::default())), Literal::Boolean(bool, _) => Value::Bool(*bool), Literal::Null(_) => Value::Null, }; - self.push_instr( - Instr::PushVal(self.bump.alloc(value)), - StackChange::Grow, - lit.span(), - ); + self.push_instr(Instr::PushVal(value), StackChange::Grow, lit.span()); Ok(()) } @@ -232,7 +232,6 @@ impl<'ast, 'bc> Compiler<'ast, 'bc> { } fn compile_expr_binary(&mut self, binary: &BinaryOp) -> CResult<()> { - // todo: is this the correct ordering? self.compile_expr(&binary.lhs)?; self.compile_expr(&binary.rhs)?; @@ -267,7 +266,7 @@ impl<'ast, 'bc> Compiler<'ast, 'bc> { *block.stack_sizes.last().expect("empty stack") - 1 } - fn push_instr(&mut self, instr: Instr<'bc>, stack_change: StackChange, span: Span) { + fn push_instr(&mut self, instr: Instr, stack_change: StackChange, span: Span) { let block = &mut self.blocks[self.current_block]; let stack_top = block.stack_sizes.last().copied().unwrap_or(0); let new_stack_top = stack_top as isize + stack_change as isize; diff --git a/src/gc.rs b/src/gc.rs index 1f178ea..5e1efbd 100644 --- a/src/gc.rs +++ b/src/gc.rs @@ -1,6 +1,7 @@ #![allow(dead_code)] -use crate::HashSet; +use crate::vm::Value; +use crate::{HashMap, HashSet}; use std::collections::LinkedList; use std::fmt::{Debug, Formatter}; use std::hash::{Hash, Hasher}; @@ -41,12 +42,91 @@ impl Clone for Gc { impl Copy for Gc {} -/// An interned String. Hashing and Equality are O(1) and just look at the pointer address +/// An reference to an interned String. Hashing and Equality are O(1) and just look at the pointer address #[derive(Clone, Copy)] pub struct Symbol { gc: Gc, } +type ObjectMap = HashMap; + +/// A reference to an Object on the heap. +/// ```js +/// let x = {}; +/// ``` +/// This is inside the local x now. +#[derive(Clone, Copy)] +pub struct Object { + gc: Gc, +} + +#[derive(Debug)] +#[repr(C)] +struct HeapObject { + kind: HeapObjectKind, +} + +#[derive(Debug)] +enum HeapObjectKind { + String(Gc), + Object(ObjectMap), +} + +#[derive(Debug)] +pub struct RtAlloc { + symbols: HashSet>, + objects: LinkedList, +} + +impl RtAlloc { + /// # Safety + /// Promise to not forget to mark any roots and to not deref `Gc` after you've dropped me 🥺 + pub unsafe fn new() -> Self { + Self { + symbols: HashSet::default(), + objects: LinkedList::new(), + } + } + + fn alloc_str(&mut self, str: &str) -> Gc { + let ptr = Box::into_raw(str.to_owned().into_boxed_str()); + // SAFETY: Box cannot be null + let new_nonnull = unsafe { NonNull::new_unchecked(ptr) }; + let gc = Gc { ptr: new_nonnull }; + let object = HeapObject { + kind: HeapObjectKind::String(gc), + }; + + self.objects.push_back(object); + + gc + } + + pub fn alloc_obj(&mut self, obj: ObjectMap) -> Object { + self.objects.push_back(HeapObject { + kind: HeapObjectKind::Object(obj), + }); + + let ptr = self.objects.back().unwrap(); + + Object { + gc: Gc { + ptr: NonNull::from(ptr), + }, + } + } + + pub fn intern_string(&mut self, str: &str) -> Symbol { + let original_nonnull = NonNull::from(str); + + if let Some(interned) = self.symbols.get(&original_nonnull) { + Symbol::new(Gc { ptr: *interned }) + } else { + Symbol::new(self.alloc_str(str)) + } + } +} + impl Symbol { pub fn new(gc: Gc) -> Self { Self { gc } @@ -89,53 +169,19 @@ impl Debug for Symbol { } } -#[derive(Debug)] -struct Object { - kind: ObjectKind, -} +impl Deref for Object { + type Target = ObjectMap; -#[derive(Debug)] -enum ObjectKind { - String(Gc), -} - -#[derive(Debug)] -pub struct RtAlloc { - symbols: HashSet>, - objects: LinkedList, -} - -impl RtAlloc { - /// # Safety - /// Promise to not forget to mark any roots and to not deref `Gc` after you've dropped me 🥺 - pub unsafe fn new() -> Self { - Self { - symbols: HashSet::default(), - objects: LinkedList::new(), + fn deref(&self) -> &Self::Target { + match self.gc.deref().kind { + HeapObjectKind::Object(ref map) => map, + _ => unreachable!(), } } +} - fn alloc_str(&mut self, str: &str) -> Gc { - let ptr = Box::into_raw(str.to_owned().into_boxed_str()); - // SAFETY: Box cannot be null - let new_nonnull = unsafe { NonNull::new_unchecked(ptr) }; - let gc = Gc { ptr: new_nonnull }; - let object = Object { - kind: ObjectKind::String(gc.clone()), - }; - - self.objects.push_back(object); - - gc - } - - pub fn intern_string(&mut self, str: &str) -> Symbol { - let original_nonnull = NonNull::from(str); - - if let Some(interned) = self.symbols.get(&original_nonnull) { - return Symbol::new(Gc { ptr: *interned }); - } - - Symbol::new(self.alloc_str(str)) +impl Debug for Object { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.gc.deref().fmt(f) } } diff --git a/src/lex.rs b/src/lex.rs index 8597999..3065253 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -128,7 +128,7 @@ impl<'code, 'gc> Lexer<'code, 'gc> { .unwrap_or(false) } - fn maybe_next_char<'a>( + fn maybe_next_char( &mut self, expect_char: char, true_type: TokenKind, diff --git a/src/lib.rs b/src/lib.rs index eeb89e2..ea3a948 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,18 +45,21 @@ pub fn run_program(program: &str) { } } -fn process_ast(program: &str, ast: Program, runtime: RtAlloc) { +fn process_ast(program: &str, ast: Program, mut runtime: RtAlloc) { println!("AST:\n{:?}\n", ast); let bytecode_alloc = Bump::new(); - let bytecode = compile::compile(&ast, &bytecode_alloc); + let bytecode = compile::compile(&ast, &bytecode_alloc, &mut runtime); match bytecode { Ok(code) => { println!("Bytecode:\n{:#?}\n", code); - let _result_lol = vm::execute(&code, runtime); + let result = vm::execute(&code, runtime); + if let Err(result) = result { + eprintln!("error: {}", result); + } } Err(err) => errors::display_error(program, err), } diff --git a/src/parse.rs b/src/parse.rs index a9790ab..4324968 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -525,10 +525,7 @@ where let next = self.next().ok_or_else(|| CompilerError::eof("primary"))?; let return_expr = match next.kind { - TokenKind::String(literal) => Ok(Expr::Literal(Literal::String( - self.bump.alloc_str(&literal), - next.span, - ))), + TokenKind::String(literal) => Ok(Expr::Literal(Literal::String(literal, next.span))), TokenKind::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))), TokenKind::False => Ok(Expr::Literal(Literal::Boolean(false, next.span))), TokenKind::True => Ok(Expr::Literal(Literal::Boolean(true, next.span))), @@ -612,13 +609,14 @@ where let expr = parser(self)?; elements.push(expr); - while self.peek_kind().ok_or_else(|| { + let reached_eof = || { CompilerError::new( Span::dummy(), format!("reached EOF expecting `{:?}`", close.clone()), ) - })? != &close - { + }; + + while self.peek_kind().ok_or_else(reached_eof)? != &close { self.expect(TokenKind::Comma)?; // trailing comma support diff --git a/src/vm.rs b/src/vm.rs index a3407a3..43e2227 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,20 +1,146 @@ -use crate::bytecode::FnBlock; -use crate::gc::RtAlloc; +use crate::bytecode::{FnBlock, Instr}; +use crate::gc::{Object, RtAlloc, Symbol}; +use std::fmt::{Debug, Display, Formatter}; -type VmResult = Result<(), ()>; +type VmError = &'static str; +type VmResult = Result<(), VmError>; -pub fn execute<'bc>(bytecode: &'bc [FnBlock<'bc>], alloc: RtAlloc) -> Result<(), ()> { - let _vm = Vm { - blocks: bytecode, - current: bytecode.first().ok_or(())?, - alloc, +pub fn execute<'bc>(bytecode: &'bc [FnBlock<'bc>], alloc: RtAlloc) -> Result<(), VmError> { + let mut vm = Vm { + _blocks: bytecode, + current: bytecode.first().ok_or("no bytecode found")?, + pc: 0, + stack: Vec::with_capacity(1024 << 5), + _alloc: alloc, }; - Ok(()) + vm.execute_function() +} + +#[derive(Debug, Clone, Copy)] +pub enum Value { + Null, + Bool(bool), + Num(f64), + String(Symbol), + Array, + Object(Object), +} + +const _: () = _check_val_size(); +const fn _check_val_size() { + if std::mem::size_of::() != 24 { + panic!("value got bigger!"); + } } struct Vm<'bc> { - blocks: &'bc [FnBlock<'bc>], + _blocks: &'bc [FnBlock<'bc>], current: &'bc FnBlock<'bc>, - alloc: RtAlloc, + _alloc: RtAlloc, + pc: usize, + stack: Vec, +} + +impl<'bc> Vm<'bc> { + fn execute_function(&mut self) -> VmResult { + let code = &self.current.code; + + loop { + let instr = code.get(self.pc); + match instr { + Some(&instr) => self.dispatch_instr(instr)?, + None => return Ok(()), + } + self.pc += 1; + } + } + + fn dispatch_instr(&mut self, instr: Instr) -> VmResult { + match instr { + Instr::Store(index) => { + let val = self.stack.pop().unwrap(); + self.stack.insert(index, val); + } + Instr::Load(index) => self.stack.push(self.stack[index]), + Instr::PushVal(value) => self.stack.push(value), + Instr::Neg => { + let val = self.stack.pop().unwrap(); + match val { + Value::Bool(bool) => self.stack.push(Value::Bool(!bool)), + Value::Num(float) => self.stack.push(Value::Num(-float)), + _ => return Err(self.type_error()), + } + } + Instr::BinAdd => self.bin_op(|lhs, rhs| match (lhs, rhs) { + (Value::Num(a), Value::Num(b)) => Ok(Value::Num(a + b)), + _ => Err("bad type"), + })?, + Instr::BinSub => self.bin_op(|lhs, rhs| match (lhs, rhs) { + (Value::Num(a), Value::Num(b)) => Ok(Value::Num(a - b)), + _ => Err("bad type"), + })?, + Instr::BinMul => self.bin_op(|lhs, rhs| match (lhs, rhs) { + (Value::Num(a), Value::Num(b)) => Ok(Value::Num(a * b)), + _ => Err("bad type"), + })?, + Instr::BinDiv => self.bin_op(|lhs, rhs| match (lhs, rhs) { + (Value::Num(a), Value::Num(b)) => Ok(Value::Num(a / b)), + _ => Err("bad type"), + })?, + Instr::BinMod => self.bin_op(|lhs, rhs| match (lhs, rhs) { + (Value::Num(a), Value::Num(b)) => Ok(Value::Num(a % b)), + _ => Err("bad type"), + })?, + Instr::BinAnd => self.bin_op(|lhs, rhs| match (lhs, rhs) { + (Value::Bool(a), Value::Bool(b)) => Ok(Value::Bool(a && b)), + _ => Err("bad type"), + })?, + Instr::BinOr => self.bin_op(|lhs, rhs| match (lhs, rhs) { + (Value::Bool(a), Value::Bool(b)) => Ok(Value::Bool(a || b)), + _ => Err("bad type"), + })?, + Instr::CmpGreater => todo!(), + Instr::CmpGreaterEq => todo!(), + Instr::CmpLess => todo!(), + Instr::CmpLessEq => todo!(), + Instr::CmpEq => todo!(), + Instr::CmpNotEq => todo!(), + Instr::Print => { + let val = self.stack.pop().unwrap(); + println!("{}", val); + } + } + + Ok(()) + } + + fn bin_op(&mut self, f: F) -> VmResult + where + F: FnOnce(Value, Value) -> Result, + { + let rhs = self.stack.pop().unwrap(); + let lhs = self.stack.pop().unwrap(); + + let result = f(lhs, rhs)?; + self.stack.push(result); + Ok(()) + } + + fn type_error(&self) -> VmError { + "bad type" + } +} + +impl Display for Value { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Value::Null => f.write_str("null"), + Value::Bool(bool) => Display::fmt(bool, f), + Value::Num(num) => Display::fmt(num, f), + Value::String(str) => f.write_str(str.as_str()), + Value::Array => todo!(), + Value::Object(_) => todo!(), + } + } } diff --git a/test.dil b/test.dil index cad3cf0..818947d 100644 --- a/test.dil +++ b/test.dil @@ -1,10 +1 @@ -let x = 2 * 3; - -let tz = x * 5; - -{ - let uwu = 5; - uwu; -} - -print 5 + 5; \ No newline at end of file +print "Hello World!"; \ No newline at end of file