hello world!

This commit is contained in:
nora 2021-12-31 16:44:21 +01:00
parent 92b40b17ed
commit d1179ff2ea
10 changed files with 263 additions and 112 deletions

View file

@ -1,5 +1,4 @@
disallowed-types = [ disallowed-types = [
{ path = "std::collections::HashMap", reason = "may be fxhash or siphash, depending on the feature, stay flexible" }, "std::collections::HashMap",
{ path = "std::collections::HashSet", reason = "may be fxhash or siphash, depending on the feature, stay flexible" }, "std::collections::HashSet",
{ path = "std::collections::Vec", reason = "we generally want to use bumpalos collections" },
] ]

View file

@ -112,7 +112,7 @@ impl Expr<'_> {
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Literal<'ast> { pub enum Literal<'ast> {
String(&'ast str, Span), String(Symbol, Span),
Number(f64, Span), Number(f64, Span),
Array(Vec<'ast, Expr<'ast>>, Span), Array(Vec<'ast, Expr<'ast>>, Span),
Object(Span), Object(Span),

View file

@ -1,26 +1,25 @@
//! The bytecode that is executed in the vm //! The bytecode that is executed in the vm
use crate::errors::Span; use crate::errors::Span;
use crate::gc::Symbol; use crate::vm::Value;
use crate::HashMap;
use bumpalo::collections::Vec; use bumpalo::collections::Vec;
#[derive(Debug)] #[derive(Debug)]
pub struct FnBlock<'bc> { pub struct FnBlock<'bc> {
pub code: Vec<'bc, Instr<'bc>>, pub code: Vec<'bc, Instr>,
pub stack_sizes: Vec<'bc, usize>, pub stack_sizes: Vec<'bc, usize>,
pub spans: Vec<'bc, Span>, pub spans: Vec<'bc, Span>,
pub arity: u8, pub arity: u8,
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum Instr<'bc> { pub enum Instr {
/// Store the current value on the stack to the stack location with the local offset `usize` /// Store the current value on the stack to the stack location with the local offset `usize`
Store(usize), Store(usize),
/// Load the variable value from the local offset `usize` onto the stack /// Load the variable value from the local offset `usize` onto the stack
Load(usize), Load(usize),
/// Push a value onto the stack /// Push a value onto the stack
PushVal(&'bc Value), PushVal(Value),
/// Negate the top value on the stack. Only works with numbers and booleans /// Negate the top value on the stack. Only works with numbers and booleans
Neg, Neg,
BinAdd, BinAdd,
@ -40,13 +39,3 @@ pub enum Instr<'bc> {
/// Println the value on top of the stack /// Println the value on top of the stack
Print, Print,
} }
#[derive(Debug)]
pub enum Value {
Null,
Bool(bool),
Num(f64),
String,
Array,
Object(HashMap<Symbol, Value>),
}

View file

@ -4,10 +4,11 @@ use crate::ast::{
Assignment, BinaryOp, BinaryOpKind, Block, Call, Declaration, Expr, FnDecl, Ident, IfStmt, Assignment, BinaryOp, BinaryOpKind, Block, Call, Declaration, Expr, FnDecl, Ident, IfStmt,
Literal, Program, Stmt, UnaryOp, WhileStmt, Literal, Program, Stmt, UnaryOp, WhileStmt,
}; };
use crate::bytecode::{FnBlock, Instr, Value}; use crate::bytecode::{FnBlock, Instr};
use crate::errors::{CompilerError, Span}; use crate::errors::{CompilerError, Span};
use crate::gc::Symbol; use crate::gc::Symbol;
use crate::HashMap; use crate::vm::Value;
use crate::{HashMap, RtAlloc};
use bumpalo::collections::Vec; use bumpalo::collections::Vec;
use bumpalo::Bump; use bumpalo::Bump;
use std::cell::RefCell; use std::cell::RefCell;
@ -49,23 +50,26 @@ impl Env<'_> {
} }
#[derive(Debug)] #[derive(Debug)]
struct Compiler<'ast, 'bc> { struct Compiler<'ast, 'bc, 'gc> {
blocks: Vec<'bc, FnBlock<'bc>>, blocks: Vec<'bc, FnBlock<'bc>>,
current_block: usize, current_block: usize,
bump: &'bc Bump, bump: &'bc Bump,
/// the current local variables that are in scope, only needed for compiling /// the current local variables that are in scope, only needed for compiling
env: Rc<RefCell<Env<'ast>>>, env: Rc<RefCell<Env<'ast>>>,
rt: &'gc mut RtAlloc,
} }
pub fn compile<'bc>( pub fn compile<'ast, 'bc, 'gc>(
ast: &Program, ast: &'ast Program,
bytecode_bump: &'bc Bump, bytecode_bump: &'bc Bump,
rt: &'gc mut RtAlloc,
) -> Result<Vec<'bc, FnBlock<'bc>>, CompilerError> { ) -> Result<Vec<'bc, FnBlock<'bc>>, CompilerError> {
let mut compiler = Compiler { let mut compiler = Compiler {
blocks: Vec::new_in(bytecode_bump), blocks: Vec::new_in(bytecode_bump),
current_block: 0, current_block: 0,
bump: bytecode_bump, bump: bytecode_bump,
env: Rc::new(RefCell::new(Default::default())), env: Rc::new(RefCell::new(Default::default())),
rt,
}; };
compiler.compile(ast)?; compiler.compile(ast)?;
@ -73,7 +77,7 @@ pub fn compile<'bc>(
Ok(compiler.blocks) Ok(compiler.blocks)
} }
impl<'ast, 'bc> Compiler<'ast, 'bc> { impl<'ast, 'bc, 'gc> Compiler<'ast, 'bc, 'gc> {
fn compile(&mut self, ast: &'ast Program<'ast>) -> CResult<()> { fn compile(&mut self, ast: &'ast Program<'ast>) -> CResult<()> {
let global_block = FnBlock { let global_block = FnBlock {
code: Vec::new_in(self.bump), code: Vec::new_in(self.bump),
@ -199,7 +203,7 @@ impl<'ast, 'bc> Compiler<'ast, 'bc> {
fn compile_expr_literal(&mut self, lit: &Literal) -> CResult<()> { fn compile_expr_literal(&mut self, lit: &Literal) -> CResult<()> {
let value = match lit { let value = match lit {
Literal::String(_str, _) => Value::String, Literal::String(str, _) => Value::String(*str),
Literal::Number(num, _) => Value::Num(*num), Literal::Number(num, _) => Value::Num(*num),
Literal::Array(vec, _) => { Literal::Array(vec, _) => {
if vec.is_empty() { if vec.is_empty() {
@ -208,16 +212,12 @@ impl<'ast, 'bc> Compiler<'ast, 'bc> {
todo!() todo!()
} }
} }
Literal::Object(_) => Value::Object(HashMap::default()), Literal::Object(_) => Value::Object(self.rt.alloc_obj(HashMap::default())),
Literal::Boolean(bool, _) => Value::Bool(*bool), Literal::Boolean(bool, _) => Value::Bool(*bool),
Literal::Null(_) => Value::Null, Literal::Null(_) => Value::Null,
}; };
self.push_instr( self.push_instr(Instr::PushVal(value), StackChange::Grow, lit.span());
Instr::PushVal(self.bump.alloc(value)),
StackChange::Grow,
lit.span(),
);
Ok(()) Ok(())
} }
@ -232,7 +232,6 @@ impl<'ast, 'bc> Compiler<'ast, 'bc> {
} }
fn compile_expr_binary(&mut self, binary: &BinaryOp) -> CResult<()> { fn compile_expr_binary(&mut self, binary: &BinaryOp) -> CResult<()> {
// todo: is this the correct ordering?
self.compile_expr(&binary.lhs)?; self.compile_expr(&binary.lhs)?;
self.compile_expr(&binary.rhs)?; self.compile_expr(&binary.rhs)?;
@ -267,7 +266,7 @@ impl<'ast, 'bc> Compiler<'ast, 'bc> {
*block.stack_sizes.last().expect("empty stack") - 1 *block.stack_sizes.last().expect("empty stack") - 1
} }
fn push_instr(&mut self, instr: Instr<'bc>, stack_change: StackChange, span: Span) { fn push_instr(&mut self, instr: Instr, stack_change: StackChange, span: Span) {
let block = &mut self.blocks[self.current_block]; let block = &mut self.blocks[self.current_block];
let stack_top = block.stack_sizes.last().copied().unwrap_or(0); let stack_top = block.stack_sizes.last().copied().unwrap_or(0);
let new_stack_top = stack_top as isize + stack_change as isize; let new_stack_top = stack_top as isize + stack_change as isize;

138
src/gc.rs
View file

@ -1,6 +1,7 @@
#![allow(dead_code)] #![allow(dead_code)]
use crate::HashSet; use crate::vm::Value;
use crate::{HashMap, HashSet};
use std::collections::LinkedList; use std::collections::LinkedList;
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
@ -41,12 +42,91 @@ impl<T: ?Sized> Clone for Gc<T> {
impl<T: ?Sized> Copy for Gc<T> {} impl<T: ?Sized> Copy for Gc<T> {}
/// An interned String. Hashing and Equality are O(1) and just look at the pointer address /// An reference to an interned String. Hashing and Equality are O(1) and just look at the pointer address
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
pub struct Symbol { pub struct Symbol {
gc: Gc<str>, gc: Gc<str>,
} }
type ObjectMap = HashMap<Symbol, Value>;
/// A reference to an Object on the heap.
/// ```js
/// let x = {};
/// ```
/// This is inside the local x now.
#[derive(Clone, Copy)]
pub struct Object {
gc: Gc<HeapObject>,
}
#[derive(Debug)]
#[repr(C)]
struct HeapObject {
kind: HeapObjectKind,
}
#[derive(Debug)]
enum HeapObjectKind {
String(Gc<str>),
Object(ObjectMap),
}
#[derive(Debug)]
pub struct RtAlloc {
symbols: HashSet<NonNull<str>>,
objects: LinkedList<HeapObject>,
}
impl RtAlloc {
/// # Safety
/// Promise to not forget to mark any roots and to not deref `Gc<T>` after you've dropped me 🥺
pub unsafe fn new() -> Self {
Self {
symbols: HashSet::default(),
objects: LinkedList::new(),
}
}
fn alloc_str(&mut self, str: &str) -> Gc<str> {
let ptr = Box::into_raw(str.to_owned().into_boxed_str());
// SAFETY: Box cannot be null
let new_nonnull = unsafe { NonNull::new_unchecked(ptr) };
let gc = Gc { ptr: new_nonnull };
let object = HeapObject {
kind: HeapObjectKind::String(gc),
};
self.objects.push_back(object);
gc
}
pub fn alloc_obj(&mut self, obj: ObjectMap) -> Object {
self.objects.push_back(HeapObject {
kind: HeapObjectKind::Object(obj),
});
let ptr = self.objects.back().unwrap();
Object {
gc: Gc {
ptr: NonNull::from(ptr),
},
}
}
pub fn intern_string(&mut self, str: &str) -> Symbol {
let original_nonnull = NonNull::from(str);
if let Some(interned) = self.symbols.get(&original_nonnull) {
Symbol::new(Gc { ptr: *interned })
} else {
Symbol::new(self.alloc_str(str))
}
}
}
impl Symbol { impl Symbol {
pub fn new(gc: Gc<str>) -> Self { pub fn new(gc: Gc<str>) -> Self {
Self { gc } Self { gc }
@ -89,53 +169,19 @@ impl Debug for Symbol {
} }
} }
#[derive(Debug)] impl Deref for Object {
struct Object { type Target = ObjectMap;
kind: ObjectKind,
}
#[derive(Debug)] fn deref(&self) -> &Self::Target {
enum ObjectKind { match self.gc.deref().kind {
String(Gc<str>), HeapObjectKind::Object(ref map) => map,
} _ => unreachable!(),
#[derive(Debug)]
pub struct RtAlloc {
symbols: HashSet<NonNull<str>>,
objects: LinkedList<Object>,
}
impl RtAlloc {
/// # Safety
/// Promise to not forget to mark any roots and to not deref `Gc<T>` after you've dropped me 🥺
pub unsafe fn new() -> Self {
Self {
symbols: HashSet::default(),
objects: LinkedList::new(),
} }
} }
}
fn alloc_str(&mut self, str: &str) -> Gc<str> { impl Debug for Object {
let ptr = Box::into_raw(str.to_owned().into_boxed_str()); fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
// SAFETY: Box cannot be null self.gc.deref().fmt(f)
let new_nonnull = unsafe { NonNull::new_unchecked(ptr) };
let gc = Gc { ptr: new_nonnull };
let object = Object {
kind: ObjectKind::String(gc.clone()),
};
self.objects.push_back(object);
gc
}
pub fn intern_string(&mut self, str: &str) -> Symbol {
let original_nonnull = NonNull::from(str);
if let Some(interned) = self.symbols.get(&original_nonnull) {
return Symbol::new(Gc { ptr: *interned });
}
Symbol::new(self.alloc_str(str))
} }
} }

View file

@ -128,7 +128,7 @@ impl<'code, 'gc> Lexer<'code, 'gc> {
.unwrap_or(false) .unwrap_or(false)
} }
fn maybe_next_char<'a>( fn maybe_next_char(
&mut self, &mut self,
expect_char: char, expect_char: char,
true_type: TokenKind, true_type: TokenKind,

View file

@ -45,18 +45,21 @@ pub fn run_program(program: &str) {
} }
} }
fn process_ast(program: &str, ast: Program, runtime: RtAlloc) { fn process_ast(program: &str, ast: Program, mut runtime: RtAlloc) {
println!("AST:\n{:?}\n", ast); println!("AST:\n{:?}\n", ast);
let bytecode_alloc = Bump::new(); let bytecode_alloc = Bump::new();
let bytecode = compile::compile(&ast, &bytecode_alloc); let bytecode = compile::compile(&ast, &bytecode_alloc, &mut runtime);
match bytecode { match bytecode {
Ok(code) => { Ok(code) => {
println!("Bytecode:\n{:#?}\n", code); println!("Bytecode:\n{:#?}\n", code);
let _result_lol = vm::execute(&code, runtime); let result = vm::execute(&code, runtime);
if let Err(result) = result {
eprintln!("error: {}", result);
}
} }
Err(err) => errors::display_error(program, err), Err(err) => errors::display_error(program, err),
} }

View file

@ -525,10 +525,7 @@ where
let next = self.next().ok_or_else(|| CompilerError::eof("primary"))?; let next = self.next().ok_or_else(|| CompilerError::eof("primary"))?;
let return_expr = match next.kind { let return_expr = match next.kind {
TokenKind::String(literal) => Ok(Expr::Literal(Literal::String( TokenKind::String(literal) => Ok(Expr::Literal(Literal::String(literal, next.span))),
self.bump.alloc_str(&literal),
next.span,
))),
TokenKind::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))), TokenKind::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))),
TokenKind::False => Ok(Expr::Literal(Literal::Boolean(false, next.span))), TokenKind::False => Ok(Expr::Literal(Literal::Boolean(false, next.span))),
TokenKind::True => Ok(Expr::Literal(Literal::Boolean(true, next.span))), TokenKind::True => Ok(Expr::Literal(Literal::Boolean(true, next.span))),
@ -612,13 +609,14 @@ where
let expr = parser(self)?; let expr = parser(self)?;
elements.push(expr); elements.push(expr);
while self.peek_kind().ok_or_else(|| { let reached_eof = || {
CompilerError::new( CompilerError::new(
Span::dummy(), Span::dummy(),
format!("reached EOF expecting `{:?}`", close.clone()), format!("reached EOF expecting `{:?}`", close.clone()),
) )
})? != &close };
{
while self.peek_kind().ok_or_else(reached_eof)? != &close {
self.expect(TokenKind::Comma)?; self.expect(TokenKind::Comma)?;
// trailing comma support // trailing comma support

148
src/vm.rs
View file

@ -1,20 +1,146 @@
use crate::bytecode::FnBlock; use crate::bytecode::{FnBlock, Instr};
use crate::gc::RtAlloc; use crate::gc::{Object, RtAlloc, Symbol};
use std::fmt::{Debug, Display, Formatter};
type VmResult = Result<(), ()>; type VmError = &'static str;
type VmResult = Result<(), VmError>;
pub fn execute<'bc>(bytecode: &'bc [FnBlock<'bc>], alloc: RtAlloc) -> Result<(), ()> { pub fn execute<'bc>(bytecode: &'bc [FnBlock<'bc>], alloc: RtAlloc) -> Result<(), VmError> {
let _vm = Vm { let mut vm = Vm {
blocks: bytecode, _blocks: bytecode,
current: bytecode.first().ok_or(())?, current: bytecode.first().ok_or("no bytecode found")?,
alloc, pc: 0,
stack: Vec::with_capacity(1024 << 5),
_alloc: alloc,
}; };
Ok(()) vm.execute_function()
}
#[derive(Debug, Clone, Copy)]
pub enum Value {
Null,
Bool(bool),
Num(f64),
String(Symbol),
Array,
Object(Object),
}
const _: () = _check_val_size();
const fn _check_val_size() {
if std::mem::size_of::<Value>() != 24 {
panic!("value got bigger!");
}
} }
struct Vm<'bc> { struct Vm<'bc> {
blocks: &'bc [FnBlock<'bc>], _blocks: &'bc [FnBlock<'bc>],
current: &'bc FnBlock<'bc>, current: &'bc FnBlock<'bc>,
alloc: RtAlloc, _alloc: RtAlloc,
pc: usize,
stack: Vec<Value>,
}
impl<'bc> Vm<'bc> {
fn execute_function(&mut self) -> VmResult {
let code = &self.current.code;
loop {
let instr = code.get(self.pc);
match instr {
Some(&instr) => self.dispatch_instr(instr)?,
None => return Ok(()),
}
self.pc += 1;
}
}
fn dispatch_instr(&mut self, instr: Instr) -> VmResult {
match instr {
Instr::Store(index) => {
let val = self.stack.pop().unwrap();
self.stack.insert(index, val);
}
Instr::Load(index) => self.stack.push(self.stack[index]),
Instr::PushVal(value) => self.stack.push(value),
Instr::Neg => {
let val = self.stack.pop().unwrap();
match val {
Value::Bool(bool) => self.stack.push(Value::Bool(!bool)),
Value::Num(float) => self.stack.push(Value::Num(-float)),
_ => return Err(self.type_error()),
}
}
Instr::BinAdd => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a + b)),
_ => Err("bad type"),
})?,
Instr::BinSub => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a - b)),
_ => Err("bad type"),
})?,
Instr::BinMul => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a * b)),
_ => Err("bad type"),
})?,
Instr::BinDiv => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a / b)),
_ => Err("bad type"),
})?,
Instr::BinMod => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a % b)),
_ => Err("bad type"),
})?,
Instr::BinAnd => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Bool(a), Value::Bool(b)) => Ok(Value::Bool(a && b)),
_ => Err("bad type"),
})?,
Instr::BinOr => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Bool(a), Value::Bool(b)) => Ok(Value::Bool(a || b)),
_ => Err("bad type"),
})?,
Instr::CmpGreater => todo!(),
Instr::CmpGreaterEq => todo!(),
Instr::CmpLess => todo!(),
Instr::CmpLessEq => todo!(),
Instr::CmpEq => todo!(),
Instr::CmpNotEq => todo!(),
Instr::Print => {
let val = self.stack.pop().unwrap();
println!("{}", val);
}
}
Ok(())
}
fn bin_op<F>(&mut self, f: F) -> VmResult
where
F: FnOnce(Value, Value) -> Result<Value, VmError>,
{
let rhs = self.stack.pop().unwrap();
let lhs = self.stack.pop().unwrap();
let result = f(lhs, rhs)?;
self.stack.push(result);
Ok(())
}
fn type_error(&self) -> VmError {
"bad type"
}
}
impl Display for Value {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Value::Null => f.write_str("null"),
Value::Bool(bool) => Display::fmt(bool, f),
Value::Num(num) => Display::fmt(num, f),
Value::String(str) => f.write_str(str.as_str()),
Value::Array => todo!(),
Value::Object(_) => todo!(),
}
}
} }

View file

@ -1,10 +1 @@
let x = 2 * 3; print "Hello World!";
let tz = x * 5;
{
let uwu = 5;
uwu;
}
print 5 + 5;