delete things

This commit is contained in:
nora 2022-04-23 17:13:32 +02:00
parent 5f9ca90fd5
commit 3f62892500
9 changed files with 6 additions and 2586 deletions

View file

@ -4,7 +4,8 @@
//! All AST nodes are bump allocated into the lifetime `'ast`
use crate::errors::Span;
use crate::gc::Symbol;
type Symbol = usize;
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[cfg_attr(feature = "_debug", derive(dbg_pls::DebugPls))]

View file

@ -1,138 +0,0 @@
//! The bytecode that is executed in the vm
//!
//! # Details
//!
//! ## Function Blocks
//! Every function is compiled into a bytecode block. These blocks are self-contained, and contain
//! all debug- and other information associated with it. The final bytecode is a collection of
//! these blocks.
//!
//! Note: Because of closures, function blocks have more required inputs than just the parameters,
//! but the compiler should handle that correctly.
//!
//! ## Local offsets
//! Variables offsets are calculated as `local offsets`. Local offsets are calculated relative to
//! the start of the space of the stack required by that function. The interpreter must keep track
//! of the stack start of each function, to be able to calculate these offsets.
//!
//! # Function calls
//! After the function returns, the interpreter resets its stack manually back
//! to the length before the call. This means the interpreter has to do some bookkeeping, but it has
//! to do that anyways.
//!
//! It is the compilers job to generate the correct loading of the arguments and assure that the arity
//! is correct before the `Call` instruction.
//!
//!
//! # ABI
//! Function arguments are passed on the stack and can be loaded just like local variables. They belong
//! to the stack frame of the new function and are cleaned up after returning, leaving the return value where
//! the stack frame was
//!
//! When a call happens, the current stack offset is pushed onto the stack as a `Value::Native` and
//! the element before it is stored as the new offset.
//! Then all parameters are pushed onto the stack, from first to last
//! Afterwards, execution of the code is started. A function always has to return, and compiler
//! inserts `return null` at the end of every function implicitly.
//!
//! If a return happens, the VM loads the current value on the stack. It then goes to the start
//! of the stack frame and saves the `Value::Native` that stores the old stack offset and loads that
//! into its stack offset. It then removes the whole stack frame from the stack, and pushes the
//! returned value.
//!
//! ```text
//! old stack offset─╮
//! ╭─Parameters─╮ │ old Function─╮ local─╮
//! v v v v v
//! ───────┬─────────┬──────────┬─────────────┬────────────┬──────────┬─────────╮
//! Num(6) │ Num(5) │ Num(6) │ NativeU(20) │ NativeU(4) │ Function │ Num(5) │
//! ───────┴─────────┴──────────┴─────────────┴────────────┴──────────┴─────────╯
//! ^ ╰────────────────────────────────────────────────────────────────── current stack frame
//! │ ^
//! ╰─ old local ╰─old PC
//!
//! ^
//! Vm ╰────────────╮
//! │
//! Current stack offset─╯
//!
//! ```
use crate::errors::Span;
use crate::vm::Value;
use bumpalo::collections::Vec;
use std::fmt::{Debug, Formatter};
/// This struct contains all data for a function.
#[cfg_attr(feature = "_debug", derive(dbg_pls::DebugPls))]
pub struct FnBlock<'bc> {
/// The bytecode of the function
pub code: Vec<'bc, Instr>,
/// The sizes of the stack required by the function after the instruction at the same index.
/// This is only used during compilation to calculate local variable offsets.
pub stack_sizes: Vec<'bc, usize>,
/// The corresponding source code location of each instruction. This is debuginfo and only
/// used if there are errors.
pub spans: Vec<'bc, Span>,
/// How many parameters the function accepts.
/// Yes, it supports 4294967295 parameters. I dare you to overflow that.
pub arity: u32,
}
impl Debug for FnBlock<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.code.fmt(f)
}
}
/// Index into the block list
pub type Function = usize;
/// A bytecode instruction. For more details on the structure of the bytecode,
/// read the module level docs [`bytecode`](`self`)
#[derive(Debug, Clone, Copy)]
#[cfg_attr(feature = "_debug", derive(dbg_pls::DebugPls))]
pub enum Instr {
/// An operation that does nothing.
Nop,
/// Store the current value on the stack to the stack location with the local offset `usize`
Store(usize),
/// Load the variable value from the local offset `usize` onto the stack
Load(usize),
/// Push a value onto the stack
PushVal(Value),
/// Negate the top value on the stack. Only works with numbers and booleans
Neg,
// The binary operations. The `rhs` is on top of the stack, and `lhs` is below it
BinAdd,
BinSub,
BinMul,
BinDiv,
BinMod,
BinAnd,
BinOr,
CmpGreater,
CmpGreaterEq,
CmpLess,
CmpLessEq,
CmpEq,
CmpNotEq,
/// Println the value on top of the stack
Print,
/// If the current stack value is false, skip `usize` instructions.
JmpFalse(isize),
/// Same as `JmpFalse`, but unconditional
Jmp(isize),
/// Calls the function at the top of the stack, after the parameters
Call,
Return,
/// Shrinks the stack by `usize` elements, should always be emitted before backwards jumps
ShrinkStack(usize),
}

View file

@ -1,578 +0,0 @@
//! The compiler that compiles the AST down to bytecode
use crate::ast::{
Assignment, BinaryOp, BinaryOpKind, Block, Call, CallKind, Declaration, ElsePart, Expr, FnDecl,
Ident, IfStmt, Literal, Program, Stmt, UnaryOp, WhileStmt,
};
use crate::bytecode::{FnBlock, Instr};
use crate::errors::{CompilerError, Span};
use crate::gc::Symbol;
use crate::vm::Value;
use crate::{HashMap, RtAlloc};
use bumpalo::collections::Vec;
use bumpalo::Bump;
use std::cell::RefCell;
use std::rc::Rc;
type CResult<T = ()> = Result<T, CompilerError>;
const CALLCONV_OFFSET_DATA: usize = 3;
#[derive(Debug, PartialEq, Eq)]
enum OuterEnvKind {
Block,
Closure,
}
#[derive(Debug)]
struct Env {
locals: HashMap<Symbol, usize>,
outer: Option<Rc<RefCell<Env>>>,
outer_kind: OuterEnvKind,
}
impl Env {
fn lookup_local(&self, name: &Ident) -> CResult<usize> {
fn lookup_inner(env: &Env, name: &Ident) -> Option<usize> {
env.locals.get(&name.sym).copied().or_else(|| {
// TODO: closure handling lol 👀
if env.outer_kind == OuterEnvKind::Closure {
return None;
}
env.outer
.as_ref()
.and_then(|outer| lookup_inner(&outer.borrow(), name))
})
}
lookup_inner(self, name).ok_or_else(|| {
CompilerError::new(
name.span,
format!("variable {} not found", name.sym.as_str()),
)
})
}
fn new_inner(outer: Rc<RefCell<Self>>, outer_kind: OuterEnvKind) -> Rc<RefCell<Self>> {
Rc::new(RefCell::new(Self {
locals: HashMap::default(),
outer: Some(outer),
outer_kind,
}))
}
}
#[derive(Debug)]
struct Compiler<'bc, 'gc> {
blocks: Vec<'bc, FnBlock<'bc>>,
current_block_idx: usize,
bump: &'bc Bump,
/// the current local variables that are in scope, only needed for compiling
env: Rc<RefCell<Env>>,
rt: &'gc mut RtAlloc,
/// How nested the current loop is, required for break offsets
loop_nesting: usize,
/// All break instructions currently in need of an offset. K=loop_nesting, V=break_indices
breaks: HashMap<usize, std::vec::Vec<usize>>,
}
pub fn compile<'ast, 'bc, 'gc>(
ast: &'ast Program,
bytecode_bump: &'bc Bump,
rt: &'gc mut RtAlloc,
) -> Result<&'bc [FnBlock<'bc>], CompilerError> {
let mut compiler = Compiler {
blocks: Vec::new_in(bytecode_bump),
current_block_idx: 0,
bump: bytecode_bump,
env: Rc::new(RefCell::new(Env {
locals: HashMap::default(),
outer: None,
outer_kind: OuterEnvKind::Block,
})),
rt,
loop_nesting: 0,
breaks: HashMap::default(),
};
compiler.compile(ast)?;
Ok(compiler.blocks.into_bump_slice())
}
impl<'bc, 'gc> Compiler<'bc, 'gc> {
fn compile(&mut self, ast: &Program) -> CResult {
let global_block = FnBlock {
code: Vec::new_in(self.bump),
stack_sizes: Vec::new_in(self.bump),
spans: Vec::new_in(self.bump),
arity: 0,
};
self.blocks.push(global_block);
self.current_block_idx = self.blocks.len() - 1;
self.compile_fn_body(ast)?;
self.push_instr(
Instr::PushVal(Value::Null),
StackChange::Grow,
Span::dummy(),
);
// exit the program.
self.push_instr(Instr::Return, StackChange::None, Span::dummy());
Ok(())
}
fn compile_fn_body(&mut self, block: &Block) -> CResult {
// padding for backwards jumps
self.push_instr(Instr::Nop, StackChange::None, block.span);
self.compile_stmts(&block.stmts)
}
fn compile_stmts(&mut self, stmts: &[Stmt]) -> CResult {
for stmt in stmts {
match stmt {
Stmt::Declaration(inner) => self.compile_declaration(inner),
Stmt::Assignment(inner) => self.compile_assignment(inner),
Stmt::FnDecl(inner) => self.compile_fn_decl(inner),
Stmt::If(inner) => self.compile_if(inner),
Stmt::Loop(block, span) => self.compile_loop(block, *span),
Stmt::While(inner) => self.compile_while(inner),
Stmt::Break(span) => self.compile_break(*span),
Stmt::Return(expr, span) => self.compile_return(expr, *span),
Stmt::Print(expr, span) => self.compile_print(expr, *span),
Stmt::Block(inner) => self.compile_block(inner),
Stmt::Expr(inner) => self.compile_expr(inner),
}?;
}
Ok(())
}
fn compile_declaration(&mut self, declaration: &Declaration) -> CResult {
// Compile the expression, the result of the expression will be the last thing left on the stack
self.compile_expr(&declaration.init)?;
// Now just remember that the value at this stack location is this variable name
let stack_pos = self.current_stack_top();
self.env
.borrow_mut()
.locals
.insert(declaration.name.sym, stack_pos);
Ok(())
}
fn compile_assignment(&mut self, assignment: &Assignment) -> CResult {
let local = match &assignment.lhs {
Expr::Ident(ident) => ident,
_ => todo!(),
};
let stack_pos = self.env.borrow().lookup_local(local)?;
self.compile_expr(&assignment.rhs)?;
self.push_instr(
Instr::Store(stack_pos),
StackChange::Shrink,
assignment.span,
);
Ok(())
}
fn compile_fn_decl(&mut self, decl: &FnDecl) -> CResult {
let block = FnBlock {
code: Vec::new_in(self.bump),
stack_sizes: Vec::new_in(self.bump),
spans: Vec::new_in(self.bump),
arity: decl.params.len().try_into().map_err(|_| {
CompilerError::new(
decl.params[u8::MAX as usize]
.span
.extend(decl.params.last().unwrap().span),
"Too many parameters. How the fuck did you do this.".to_string(),
)
})?,
};
// set the new block as the current block
let new_block_idx = self.blocks.len();
self.blocks.push(block);
let old_block_idx = self.current_block_idx;
self.current_block_idx = new_block_idx;
// compile the body with a captured environment
let inner_env = Env::new_inner(self.env.clone(), OuterEnvKind::Closure);
self.env = inner_env;
{
// insert params as locals
let mut env_mut = self.env.borrow_mut();
for (i, param) in decl.params.iter().enumerate() {
env_mut.locals.insert(param.sym, i);
}
let block = &mut self.blocks[self.current_block_idx];
block.code.push(Instr::Nop);
block.spans.push(decl.span);
block
.stack_sizes
.push(decl.params.len() + CALLCONV_OFFSET_DATA);
}
self.compile_stmts(&decl.body.stmts)?;
self.push_instr(Instr::PushVal(Value::Null), StackChange::Grow, decl.span);
self.push_instr(Instr::Return, StackChange::None, decl.span);
let outer = self.env.borrow().outer.clone().expect("outer env got lost");
self.env = outer;
self.current_block_idx = old_block_idx;
// save the function as a local variable
self.push_instr(
Instr::PushVal(Value::Function(new_block_idx)),
StackChange::Grow,
decl.span,
);
let stack_pos = self.current_stack_top();
self.env
.borrow_mut()
.locals
.insert(decl.name.sym, stack_pos);
Ok(())
}
fn compile_if(&mut self, if_stmt: &IfStmt) -> CResult {
/*
0 PushVal (true)
1 JumpCond (2)
2 // it is true
4 Jmp (1) this is optional only for else
>5 // it it false │
>7 // continue here
*/
self.compile_expr(&if_stmt.cond)?;
// the offset will be fixed later
let jmp_idx = self.push_instr(Instr::JmpFalse(0), StackChange::Shrink, if_stmt.span);
self.compile_block(&if_stmt.body)?;
if let Some(else_part) = if_stmt.else_part {
let else_skip_jmp_idx = self.push_instr(Instr::Jmp(0), StackChange::None, if_stmt.span);
let jmp_pos = self.forward_jmp_offset(jmp_idx as isize);
self.change_instr(jmp_idx, Instr::JmpFalse(jmp_pos));
match else_part {
ElsePart::Else(block, _) => {
self.compile_block(block)?;
}
ElsePart::ElseIf(if_stmt, _) => {
self.compile_if(if_stmt)?;
}
}
let jmp_pos = self.forward_jmp_offset(else_skip_jmp_idx as isize);
self.change_instr(else_skip_jmp_idx, Instr::Jmp(jmp_pos));
} else {
let jmp_pos = self.forward_jmp_offset(jmp_idx as isize);
self.change_instr(jmp_idx, Instr::JmpFalse(jmp_pos));
}
Ok(())
}
fn compile_loop(&mut self, ast_block: &Block, span: Span) -> CResult {
/*
>0 // do things
1 JMP (-2),
*/
let first_stmt_idx = self.code_len();
let pre_loop_stack_size = self.current_stack_size();
self.loop_nesting += 1;
self.compile_block(ast_block)?;
self.shrink_stack(pre_loop_stack_size, span);
let jmp_offset = self.back_jmp_offset(first_stmt_idx);
self.push_instr(Instr::Jmp(jmp_offset), StackChange::None, span);
self.end_loop();
Ok(())
}
fn compile_while(&mut self, while_stmt: &WhileStmt) -> CResult {
/*
>0 PushVal (true)
1 JmpFalse (2)
2 // body
3 Jmp (-3)
>4 // continue here
*/
let cond_index = self.code_len();
let pre_loop_stack_size = self.current_stack_size();
self.loop_nesting += 1;
self.compile_expr(&while_stmt.cond)?;
let jmp_false_idx =
self.push_instr(Instr::JmpFalse(0), StackChange::Shrink, while_stmt.span);
self.compile_block(&while_stmt.body)?;
self.shrink_stack(pre_loop_stack_size, while_stmt.span);
let jmp_offset = self.back_jmp_offset(cond_index);
self.push_instr(Instr::Jmp(jmp_offset), StackChange::None, while_stmt.span);
let jmp_offset = self.forward_jmp_offset(jmp_false_idx as isize);
self.change_instr(jmp_false_idx, Instr::JmpFalse(jmp_offset));
self.end_loop();
Ok(())
}
fn compile_break(&mut self, span: Span) -> CResult {
let break_idx = self.push_instr(Instr::Jmp(0), StackChange::None, span);
self.breaks
.entry(self.loop_nesting)
.or_default()
.push(break_idx);
Ok(())
}
fn compile_return(&mut self, expr: &Option<Expr>, span: Span) -> CResult {
if let Some(expr) = expr {
self.compile_expr(expr)?;
} else {
self.push_instr(Instr::PushVal(Value::Null), StackChange::Grow, span);
}
self.push_instr(Instr::Return, StackChange::None, span);
Ok(())
}
fn compile_print(&mut self, expr: &Expr, span: Span) -> CResult {
self.compile_expr(expr)?;
self.push_instr(Instr::Print, StackChange::Shrink, span);
Ok(())
}
fn compile_block(&mut self, block: &Block) -> CResult {
let next_env = Env::new_inner(self.env.clone(), OuterEnvKind::Block);
self.env = next_env;
self.compile_stmts(&block.stmts)?;
let outer = self.env.borrow().outer.clone().expect("outer env got lost");
self.env = outer;
Ok(())
}
fn compile_expr(&mut self, expr: &Expr) -> CResult {
match expr {
Expr::Ident(inner) => self.compile_expr_ident(inner),
Expr::Literal(inner) => self.compile_expr_literal(inner),
Expr::UnaryOp(inner) => self.compile_expr_unary(inner),
Expr::BinaryOp(inner) => self.compile_expr_binary(inner),
Expr::Call(inner) => self.compile_expr_call(inner),
}
}
fn compile_expr_ident(&mut self, name: &Ident) -> CResult {
let offset = self.env.borrow().lookup_local(name)?;
self.push_instr(Instr::Load(offset), StackChange::Grow, name.span);
Ok(())
}
fn compile_expr_literal(&mut self, lit: &Literal) -> CResult {
let value = match lit {
Literal::String(str, _) => Value::String(*str),
Literal::Number(num, _) => Value::Num(*num),
Literal::Array(vec, _) => {
if vec.is_empty() {
Value::Array
} else {
todo!()
}
}
Literal::Object(_) => Value::Object(self.rt.alloc_obj(HashMap::default())),
Literal::Boolean(bool, _) => Value::Bool(*bool),
Literal::Null(_) => Value::Null,
};
self.push_instr(Instr::PushVal(value), StackChange::Grow, lit.span());
Ok(())
}
fn compile_expr_unary(&mut self, unary: &UnaryOp) -> CResult {
self.compile_expr(&unary.expr)?;
// not and neg compile to the same instruction
self.push_instr(Instr::Neg, StackChange::None, unary.span);
Ok(())
}
fn compile_expr_binary(&mut self, binary: &BinaryOp) -> CResult {
self.compile_expr(&binary.lhs)?;
self.compile_expr(&binary.rhs)?;
let instruction = match binary.kind {
BinaryOpKind::Add => Instr::BinAdd,
BinaryOpKind::And => Instr::BinAnd,
BinaryOpKind::Or => Instr::BinOr,
BinaryOpKind::Equal => Instr::CmpEq,
BinaryOpKind::GreaterEqual => Instr::CmpGreaterEq,
BinaryOpKind::Greater => Instr::CmpGreater,
BinaryOpKind::LessEqual => Instr::CmpLessEq,
BinaryOpKind::Less => Instr::CmpLess,
BinaryOpKind::NotEqual => Instr::CmpNotEq,
BinaryOpKind::Sub => Instr::BinSub,
BinaryOpKind::Mul => Instr::BinMul,
BinaryOpKind::Div => Instr::BinDiv,
BinaryOpKind::Mod => Instr::BinMod,
};
self.push_instr(instruction, StackChange::Shrink, binary.span);
Ok(())
}
fn compile_expr_call(&mut self, call: &Call) -> CResult {
let params = match &call.kind {
CallKind::Fn(params) => params,
_ => todo!(),
};
let name = match &call.callee {
Expr::Ident(ident) => ident,
_ => todo!(),
};
let offset = self.env.borrow().lookup_local(name)?;
for param in params.iter() {
self.compile_expr(param)?;
}
self.push_instr(Instr::Load(offset), StackChange::Grow, call.span);
self.push_instr(Instr::Call, StackChange::Grow, call.span);
Ok(())
}
fn shrink_stack(&mut self, jmp_target_stack_size: usize, span: Span) {
let amount = self.current_stack_size() - jmp_target_stack_size;
if amount == 0 {
return;
}
self.push_instr(
Instr::ShrinkStack(amount),
StackChange::ShrinkN(amount),
span,
);
}
fn end_loop(&mut self) {
let breaks = self.breaks.remove(&self.loop_nesting);
if let Some(breaks) = breaks {
for brk in breaks {
let offset = self.forward_jmp_offset(brk as isize);
self.change_instr(brk, Instr::Jmp(offset));
}
}
self.loop_nesting -= 1;
}
fn current_stack_top(&self) -> usize {
let block = &self.blocks[self.current_block_idx];
// we want the stack position, not the size, so the `- 1`
*block.stack_sizes.last().expect("empty stack") - 1
}
/// source is implicitly: self.code_len()
fn back_jmp_offset(&self, target: isize) -> isize {
let source = self.code_len();
-(source - target + 1)
}
/// target is implicitly: self.code_len()
fn forward_jmp_offset(&self, source: isize) -> isize {
let target = self.code_len();
target - (source) - 1
}
fn code_len(&self) -> isize {
let block = &self.blocks[self.current_block_idx];
block.code.len() as isize
}
fn current_stack_size(&self) -> usize {
let block = &self.blocks[self.current_block_idx];
block.stack_sizes.last().copied().unwrap_or(0)
}
fn change_instr(&mut self, index: usize, instr: Instr) {
let block = &mut self.blocks[self.current_block_idx];
block.code[index] = instr;
}
/// Pushes an instruction and returns the index of the new instruction
fn push_instr(&mut self, instr: Instr, stack_change: StackChange, span: Span) -> usize {
let block = &mut self.blocks[self.current_block_idx];
let stack_top = block.stack_sizes.last().copied().unwrap_or(0);
let new_stack_top = stack_top as isize + stack_change.as_isize();
assert!(new_stack_top >= 0, "instruction popped stack below 0");
let new_stack_top = new_stack_top as usize;
block.code.push(instr);
block.stack_sizes.push(new_stack_top);
block.spans.push(span);
debug_assert_eq!(block.code.len(), block.stack_sizes.len());
debug_assert_eq!(block.code.len(), block.spans.len());
block.code.len() - 1
}
}
#[derive(Debug, Copy, Clone)]
enum StackChange {
Shrink,
None,
Grow,
ShrinkN(usize),
}
impl StackChange {
fn as_isize(&self) -> isize {
match self {
StackChange::Shrink => -1,
StackChange::None => 0,
StackChange::Grow => 1,
StackChange::ShrinkN(n) => -(*n as isize),
}
}
}

230
src/gc.rs
View file

@ -1,230 +0,0 @@
//! The garbage collector for the language
//!
//! The structure of the GC might change, but for now it's simply a `LinkedList` of `Object`s.
use crate::vm::Value;
use crate::{HashMap, HashSet};
use std::collections::LinkedList;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::ptr::NonNull;
/// A pointer to a garbage collected value. This pointer *must* always be valid, and a value
/// is only allowed to be freed once no Gc is pointing at it anymore. This is achieved through
/// tracing through all objects from a few known roots and marking every reachable value. All other
/// values will be swept.
pub struct Gc<T: ?Sized> {
ptr: NonNull<T>,
}
#[cfg(feature = "_debug")]
impl<T: ?Sized> dbg_pls::DebugPls for Gc<T> {
fn fmt(&self, f: dbg_pls::Formatter<'_>) {
todo!()
}
}
impl<T: ?Sized> Deref for Gc<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
// SAFETY: Gc will always point to a valid T, since T will only be freed once all Gc are gone
// This requires tracing through *all* roots without forgetting any
// I would guess that there will be some errors with the garbage collector, but once they are
// all fixed this will be sound. But who knows.
unsafe { &*self.ptr.as_ptr() }
}
}
impl<T: Debug + ?Sized> Debug for Gc<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
T::fmt(self, f)
}
}
impl<T: ?Sized> Clone for Gc<T> {
fn clone(&self) -> Self {
Self { ..*self }
}
}
impl<T: ?Sized> Copy for Gc<T> {}
/// An reference to an interned String. Hashing and Equality are O(1) and just look at the pointer address
#[derive(Clone, Copy)]
#[cfg_attr(feature = "_debug", derive(dbg_pls::DebugPls))]
pub struct Symbol {
gc: Gc<str>,
}
type ObjectMap = HashMap<Symbol, Value>;
/// A reference to an Object on the heap.
/// ```js
/// let x = {};
/// ```
/// This is inside the local x now.
#[derive(Clone, Copy)]
#[cfg_attr(feature = "_debug", derive(dbg_pls::DebugPls))]
pub struct Object {
gc: Gc<HeapObject>,
}
#[derive(Debug)]
#[repr(C)]
struct HeapObject {
kind: HeapObjectKind,
}
#[derive(Debug)]
enum HeapObjectKind {
String(Gc<str>),
Object(ObjectMap),
}
#[derive(Debug)]
pub struct RtAlloc {
symbols: HashSet<NonNullStrWrapper>,
objects: LinkedList<HeapObject>,
}
#[derive(Debug)]
struct NonNullStrWrapper(NonNull<str>);
impl Hash for NonNullStrWrapper {
fn hash<H: Hasher>(&self, state: &mut H) {
// SAFETY: Assume the ptr is valid, same rules as `Gc<T>`
unsafe { self.0.as_ref().hash(state) }
}
}
impl PartialEq for NonNullStrWrapper {
fn eq(&self, other: &Self) -> bool {
// SAFETY: Assume the ptr is valid, same rules as `Gc<T>`
unsafe { self.0.as_ref().eq(other.0.as_ref()) }
}
}
impl Eq for NonNullStrWrapper {}
impl RtAlloc {
/// # Safety
/// Promise to not forget to mark any roots and to not deref `Gc<T>` after you've dropped me 🥺
pub unsafe fn new() -> Self {
Self {
symbols: HashSet::default(),
objects: LinkedList::new(),
}
}
fn alloc_str(&mut self, str: &str) -> Gc<str> {
let ptr = Box::into_raw(str.to_owned().into_boxed_str());
// SAFETY: Box cannot be null
let new_nonnull = unsafe { NonNull::new_unchecked(ptr) };
let gc = Gc { ptr: new_nonnull };
let object = HeapObject {
kind: HeapObjectKind::String(gc),
};
self.objects.push_back(object);
gc
}
pub fn alloc_obj(&mut self, obj: ObjectMap) -> Object {
self.objects.push_back(HeapObject {
kind: HeapObjectKind::Object(obj),
});
let ptr = self.objects.back().unwrap();
Object {
gc: Gc {
ptr: NonNull::from(ptr),
},
}
}
pub fn intern_string(&mut self, str: &str) -> Symbol {
let original_nonnull = NonNull::from(str);
if let Some(interned) = self.symbols.get(&NonNullStrWrapper(original_nonnull)) {
Symbol::new(Gc { ptr: interned.0 })
} else {
let allocated = self.alloc_str(str);
self.symbols.insert(NonNullStrWrapper(allocated.ptr));
Symbol::new(allocated)
}
}
}
impl Drop for RtAlloc {
fn drop(&mut self) {
// free all interned strings
for str in &self.symbols {
let raw = str.0.as_ptr();
// SAFETY: No one has free these, see `Gc<T>`
drop(unsafe { Box::from_raw(raw) });
}
}
}
impl Symbol {
pub fn new(gc: Gc<str>) -> Self {
Self { gc }
}
fn address(&self) -> usize {
self.gc.ptr.as_ptr() as *mut u8 as usize
}
pub fn as_str(&self) -> &str {
&*self.gc
}
}
impl Hash for Symbol {
fn hash<H: Hasher>(&self, state: &mut H) {
self.address().hash(state);
}
}
impl PartialEq for Symbol {
fn eq(&self, other: &Self) -> bool {
self.address() == other.address()
}
}
impl Eq for Symbol {}
impl Deref for Symbol {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl Debug for Symbol {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.as_str().fmt(f)
}
}
impl Deref for Object {
type Target = ObjectMap;
fn deref(&self) -> &Self::Target {
match self.gc.deref().kind {
HeapObjectKind::Object(ref map) => map,
_ => unreachable!(),
}
}
}
impl Debug for Object {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.gc.deref().fmt(f)
}
}

View file

@ -1,535 +0,0 @@
//!
//! The lex module lexes the source code into Tokens
//!
//! For error handling, there is a single `Error` token, which contains the error. The lexer
//! is an iterator, and can therefore be used without any allocations
use crate::errors::{CompilerError, Span};
use crate::gc::Symbol;
use crate::RtAlloc;
use std::iter::Peekable;
use std::str::CharIndices;
///
/// A single token generated from the lexer
///
/// For example `for`, `"hello"`, `main` or `.`
#[derive(Debug, Clone)]
pub struct Token {
pub span: Span,
pub kind: TokenKind,
}
impl Token {
fn single_span(start: usize, kind: TokenKind) -> Token {
Self {
span: Span::single(start),
kind,
}
}
fn new(span: Span, kind: TokenKind) -> Token {
Self { span, kind }
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenKind {
// keywords
Let,
Print,
Fn,
If,
Else,
Loop,
While,
For,
Break,
Return,
True,
False,
Null,
And,
Or,
Not,
// literals
String(Symbol),
Number(f64),
// ident
Ident(Symbol),
// punctuation
/// ;
Semi,
/// +
Plus,
/// -
Minus,
/// *
Asterisk,
/// /
Slash,
/// %
Percent,
/// {
BraceO,
/// }
BraceC,
/// [
BracketO,
/// ]
BracketC,
/// (
ParenO,
/// )
ParenC,
/// .
Dot,
/// ,
Comma,
// =
Equal,
/// ==
EqualEqual,
/// !=
BangEqual,
/// >
Greater,
/// <
Less,
/// >=
GreaterEqual,
/// <=
LessEqual,
/// An error occurred. It's boxed to save space, since `CompilerError` is > 6 `usize` big
Error(Box<CompilerError>),
}
#[derive(Debug)]
pub struct Lexer<'code, 'gc> {
code: Peekable<CharIndices<'code>>,
src: &'code str,
rt_alloc: &'gc mut RtAlloc,
}
impl<'code, 'gc> Lexer<'code, 'gc> {
pub fn new(code: &'code str, rt_alloc: &'gc mut RtAlloc) -> Self {
Self {
code: code.char_indices().peekable(),
src: code,
rt_alloc,
}
}
fn expect(&mut self, expected: char) -> bool {
self.code
.peek()
.map_or(false, |(_, char)| *char == expected)
}
fn maybe_next_char(
&mut self,
expect_char: char,
true_type: TokenKind,
false_type: TokenKind,
start: usize,
) -> Token {
if self.expect(expect_char) {
let _ = self.code.next(); // consume first one
Token {
span: Span::start_len(start, start + 2),
kind: true_type,
}
} else {
Token {
span: Span::single(start),
kind: false_type,
}
}
}
fn keyword_or_ident(&mut self, name: &str) -> TokenKind {
match name {
"loop" => TokenKind::Loop,
"let" => TokenKind::Let,
"fn" => TokenKind::Fn,
"for" => TokenKind::For,
"false" => TokenKind::False,
"if" => TokenKind::If,
"else" => TokenKind::Else,
"while" => TokenKind::While,
"break" => TokenKind::Break,
"return" => TokenKind::Return,
"true" => TokenKind::True,
"null" => TokenKind::Null,
"not" => TokenKind::Not,
"and" => TokenKind::And,
"or" => TokenKind::Or,
"print" => TokenKind::Print,
_ => TokenKind::Ident(self.rt_alloc.intern_string(name)),
}
}
}
impl<'code, 'gc> Iterator for Lexer<'code, 'gc> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
let token = loop {
let (start, char) = self.code.next()?;
match char {
_ if char.is_whitespace() => {}
'#' => {
// only peek so we don't skip the \n if the # is at the end
if let Some((_, '#')) = self.code.peek() {
let _ = self.code.next();
loop {
if let Some((_, '#')) | None = self.code.next() {
if let Some((_, '#')) | None = self.code.next() {
break;
}
}
}
} else {
loop {
if let Some((_, '\n')) | None = self.code.next() {
break;
}
}
}
}
';' => break Token::single_span(start, TokenKind::Semi),
'+' => break Token::single_span(start, TokenKind::Plus),
'-' => break Token::single_span(start, TokenKind::Minus),
'*' => break Token::single_span(start, TokenKind::Asterisk),
'/' => break Token::single_span(start, TokenKind::Slash),
'%' => break Token::single_span(start, TokenKind::Percent),
'{' => break Token::single_span(start, TokenKind::BraceO),
'}' => break Token::single_span(start, TokenKind::BraceC),
'[' => break Token::single_span(start, TokenKind::BracketO),
']' => break Token::single_span(start, TokenKind::BracketC),
'(' => break Token::single_span(start, TokenKind::ParenO),
')' => break Token::single_span(start, TokenKind::ParenC),
'.' => break Token::single_span(start, TokenKind::Dot),
',' => break Token::single_span(start, TokenKind::Comma),
'=' => {
break self.maybe_next_char(
'=',
TokenKind::EqualEqual,
TokenKind::Equal,
start,
);
}
'!' => {
break if self.expect('=') {
let _ = self.code.next(); // consume =;
Token::new(Span::start_len(start, start + 2), TokenKind::BangEqual)
} else {
Token::new(
Span::single(start),
TokenKind::Error(Box::new(CompilerError::with_note(
Span::single(start),
"Expected '=' after '!'".to_string(),
"If you meant to use it for negation, use `not`".to_string(),
))),
)
};
}
'>' => {
break self.maybe_next_char(
'=',
TokenKind::GreaterEqual,
TokenKind::Greater,
start,
);
}
'<' => {
break self.maybe_next_char('=', TokenKind::LessEqual, TokenKind::Less, start);
}
'"' => {
let mut buffer = String::new();
let mut escaped = false;
let end = loop {
match self.code.next() {
Some((end, '"')) if !escaped => break end,
Some((_, '\\')) if !escaped => escaped = true,
Some((_, char)) => {
escaped = false;
buffer.push(char);
}
None => {
return Some(Token::new(
Span::single(start),
TokenKind::Error(Box::new(CompilerError::with_note(
Span::single(start), // no not show the whole literal, this does not make sense
"String literal not closed".to_string(),
"Close the literal using '\"'".to_string(),
))),
));
}
}
};
break Token::new(
Span::start_end(start, end),
TokenKind::String(self.rt_alloc.intern_string(&buffer)),
);
}
char => {
if char.is_ascii_digit() {
let mut num_buffer = String::from(char); // we need to ignore `_`
let mut had_dot = false;
let end = loop {
// peek here because the character signaling the end should not be consumed
match self.code.peek().copied() {
Some((_, '.')) if !had_dot => {
let _ = self.code.next();
num_buffer.push('.');
had_dot = true;
}
Some((_, '_')) => {
let _ = self.code.next();
}
Some((_, next_char)) if next_char.is_ascii_digit() => {
let _ = self.code.next();
num_buffer.push(next_char);
}
Some((end, _)) => break end,
None => break self.src.len(), // reached EOF, so parse this number
}
};
let number_str = &num_buffer;
let span = Span::start_end(start, end);
let number = number_str.parse::<f64>();
break match number {
Ok(number) if number.is_infinite() => {
Token::new(span, TokenKind::Error(Box::new(CompilerError::with_note(
span,
"Number literal too long".to_string(),
"A number literal cannot be larger than a 64 bit float can represent"
.to_string(),
))))
}
Ok(number) => Token::new(span, TokenKind::Number(number)),
Err(err) => Token::new(span, TokenKind::Error(Box::new(CompilerError::with_note(
span,
"Invalid number".to_string(),
err.to_string(),
)))),
};
} else if is_valid_ident_start(char) {
// it must be an identifier
let end = loop {
match self.code.peek() {
Some((_, char)) if is_valid_ident_part(*char) => {
let _ = self.code.next(); // consume identifier part
}
Some((end, _)) => break *end,
None => break self.src.len(),
}
};
break Token::new(
Span::start_end(start, end),
self.keyword_or_ident(&self.src[start..end]),
);
} else {
break Token::new(
Span::single(start),
TokenKind::Error(Box::new(CompilerError::with_note(
Span::single(start),
format!("Unexpected character: '{}'", char),
"Character is not allowed outside of string literals and comments"
.to_string(),
))),
);
}
}
}
};
Some(token)
}
}
fn is_valid_ident_part(char: char) -> bool {
char.is_alphanumeric() || char == '_'
}
fn is_valid_ident_start(char: char) -> bool {
char.is_alphabetic() || char == '_'
}
#[cfg(test)]
mod test {
use crate::lex::Lexer;
use crate::RtAlloc;
type StdString = std::string::String;
fn lex_test(code: &str) {
// SAFETY: we only work in this tiny scope
let mut runtime = unsafe { RtAlloc::new() };
let lexer = Lexer::new(code, &mut runtime);
let tokens = lexer.map(|token| token.kind).collect::<Vec<_>>();
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn smiley_face() {
lex_test(">>.<<");
}
#[test]
fn greater_than_less_than_equal() {
lex_test(">= <= == < < >=");
}
#[test]
fn no_no_no() {
lex_test("!= != = !=");
}
#[test]
fn braces_brackets_parens() {
lex_test("{([]]}");
}
#[test]
fn braces_brackets_parens_whitespace() {
lex_test(
"{ ( [ ] ]
}",
);
}
#[test]
fn fancy_stuff() {
lex_test(". ,- * -, .");
}
#[test]
fn comments() {
lex_test("fn # fn");
}
#[test]
fn long_multiline_comment() {
lex_test(
"fn ## hello i am something
i span multiple lines
will you love me? 🥺🥺🥺🥺🥺
pls :) o(**)
i like the indentation here ngl | sneak for -> ## for ## <- sneak for
## and",
);
}
#[test]
fn terminate_multiline_comment_correctly() {
lex_test(
"fn ## # no not here :( ## let # ## <- this is commented out
# so no multiline comment
##
here it starts
# let #
# # and
## or
",
);
}
#[test]
fn greeting() {
lex_test("-.- /%");
}
#[test]
fn countdown() {
lex_test("3 . . 2 . . 1 . . 0");
}
#[test]
fn underscore_number() {
lex_test("1_000_000");
}
#[test]
fn trailing_underscore_number() {
lex_test("1_00_");
}
#[test]
fn larger_numbers() {
lex_test("123456789, 123456789.1234, 64785903");
}
#[test]
fn string() {
lex_test(r#""uwu""#);
}
#[test]
fn strings() {
lex_test(r#"( "hi" "uwu" "\"uwu\"" "no \\ u" )"#);
}
#[test]
fn keywords() {
lex_test("let fn if else loop while break for true false null and not or print");
}
#[test]
fn keyword_and_ident() {
lex_test("let variable be a loop if false is true");
}
#[test]
fn not_quite_a_keyword() {
let words = [
"letter",
"fori",
"fnfn",
"iffy",
"bloop",
"loopy_yeah",
"whileTrue",
"truefalse",
"falsetrue",
"nullability",
"rot",
"ornot",
"nor",
"andnowQuestionMark",
"notOrAnd",
"breakMe",
"Ibreak",
];
let sentences = words
.iter()
.map(|word| format!("{} ", word))
.collect::<StdString>();
lex_test(&sentences);
}
#[test]
fn serious_program() {
lex_test(
r#"let string = "hallol"
let number = 5
let me out ._.
fn world() {
if number == 5 or true == false and not false {
println("Hello \\ World!")
}
}"#,
);
}
}

View file

@ -1,22 +1,12 @@
#![deny(clippy::disallowed_type)]
mod ast;
mod bytecode;
mod compile;
mod errors;
mod gc;
mod lex;
mod parse;
mod util;
mod vm;
use crate::ast::Program;
use crate::gc::RtAlloc;
use std::io::Write;
pub use bumpalo::Bump;
pub use lex::*;
pub use parse::*;
#[cfg(not(feature = "fxhash"))]
#[allow(clippy::disallowed_types)]
@ -32,86 +22,10 @@ type HashSet<T> = std::collections::HashSet<T>;
#[cfg(feature = "fxhash")]
type HashSet<T> = rustc_hash::FxHashSet<T>;
pub struct Config<'io> {
pub debug: bool,
pub step: bool,
pub stdout: &'io mut dyn Write,
pub fn process_ast(program: &str, ast: &Program) {
dbg(ast);
}
pub fn run_program(program: &str, cfg: &mut Config) {
if cfg.debug {
eprintln!("Config: debug: {}, step: {}", cfg.debug, cfg.step);
}
let ast_alloc = Bump::new();
// SAFETY: I will try to 🥺
let mut runtime = unsafe { RtAlloc::new() };
let lexer = lex::Lexer::new(program, &mut runtime);
let ast = parse::parse(lexer, &ast_alloc);
match ast {
Ok(ast) => process_ast(program, &ast, runtime, cfg),
Err(err) => errors::display_error(program, err),
}
}
fn process_ast(program: &str, ast: &Program, mut runtime: RtAlloc, cfg: &mut Config<'_>) {
if cfg.debug {
util::dbg("AST:\n", ast);
}
let bytecode_alloc = Bump::new();
let bytecode = compile::compile(ast, &bytecode_alloc, &mut runtime);
match bytecode {
Ok(code) => {
if cfg.debug {
println!("Bytecode:\n{:#?}\n", code);
}
let result = vm::execute(code, runtime, cfg);
if let Err(result) = result {
eprintln!("error: {}", result);
}
}
Err(err) => errors::display_error(program, err),
}
}
// have the code here and not in the fuzzer, it's easier to find when it breaks like this
#[doc(hidden)]
pub fn _fuzz_compile(program: &str) {
// SAFETY: Just this scope
let mut runtime = unsafe { RtAlloc::new() };
let ast_alloc = Bump::new();
let lexer = lex::Lexer::new(program, &mut runtime);
let ast = parse::parse(lexer, &ast_alloc);
if let Ok(ast) = ast {
let bytecode_alloc = Bump::new();
let _bytecode = compile::compile(&ast, &bytecode_alloc, &mut runtime);
}
}
#[doc(hidden)]
pub fn _fuzz_parse(program: &str) {
// SAFETY: Just this scope
let mut runtime = unsafe { RtAlloc::new() };
let ast_alloc = Bump::new();
let lexer = lex::Lexer::new(program, &mut runtime);
let _ast = parse::parse(lexer, &ast_alloc);
}
#[doc(hidden)]
pub fn _fuzz_lex(program: &str) {
// SAFETY: Just this scope
let mut runtime = unsafe { RtAlloc::new() };
let lexer = lex::Lexer::new(program, &mut runtime);
for _token in lexer {}
pub fn dbg(x: impl dbg_pls::DebugPls) {
eprintln!("{}", dbg_pls::pretty(&x))
}

View file

@ -1,682 +0,0 @@
//! The parser implementation.
//!
//! It's a handwritten recursive descent parser. It has an internal peekable iterator from where
//! it gets its next tokens. Only a lookahead of one is required.
#[cfg(test)]
mod test;
use crate::ast::*;
use crate::errors::{CompilerError, Span};
use crate::lex::{Token, TokenKind};
use bumpalo::collections::Vec;
use bumpalo::Bump;
use std::iter::Peekable;
#[derive(Debug)]
struct Parser<'ast, I>
where
I: Iterator<Item = Token>,
{
tokens: Peekable<I>,
depth: usize,
inside_fn_depth: usize,
inside_loop_depth: usize,
bump: &'ast Bump,
}
pub fn parse<'lexer, 'ast>(
tokens: impl Iterator<Item = Token> + 'lexer,
ast_bump: &'ast Bump,
) -> Result<Program<'ast>, CompilerError> {
let mut parser = Parser {
tokens: tokens.peekable(),
depth: 0,
inside_fn_depth: 0,
inside_loop_depth: 0,
bump: ast_bump,
};
let program = parser.program()?;
Ok(program)
}
type ParseResult<T> = Result<T, CompilerError>;
macro_rules! parse_bin_op {
($self: ident, $lhs: ident, $kind: expr, $function: ident) => {{
let _ = $self.next();
let rhs = $self.$function()?;
Ok(Expr::BinaryOp($self.bump.alloc(BinaryOp {
span: $lhs.span().extend(rhs.span()),
lhs: $lhs,
rhs,
kind: $kind,
})))
}};
}
macro_rules! exit_parse {
($self: ident) => {
$self.depth -= 1;
};
}
macro_rules! enter_parse {
($self: ident) => {
$self.depth += 1;
if $self.depth > Self::MAX_DEPTH {
let _ = $self.too_nested_error()?;
}
};
}
impl<'ast, I> Parser<'ast, I>
where
I: Iterator<Item = Token>,
{
const MAX_DEPTH: usize = 100;
fn program(&mut self) -> ParseResult<Program<'ast>> {
Ok(Block {
stmts: self.statement_list()?,
span: Span::dummy(),
})
}
fn too_nested_error(&mut self) -> ParseResult<()> {
let next_token = self.next();
match next_token {
Some(token) => Err(CompilerError::new(
token.span,
"reached maximal nesting depth".to_string(),
)),
None => Err(CompilerError::eof(
"reached EOF while being nested to deeply",
)),
}
}
fn statement_list(&mut self) -> ParseResult<&'ast [Stmt<'ast>]> {
enter_parse!(self);
let mut stmts = Vec::new_in(self.bump);
let return_stmts = loop {
if let Some(TokenKind::BraceC) | None = self.peek_kind() {
break Ok(stmts.into_bump_slice());
}
let stmt = self.statement()?;
stmts.push(stmt);
};
exit_parse!(self);
return_stmts
}
fn block(&mut self) -> ParseResult<Block<'ast>> {
enter_parse!(self);
let start_span = self.expect(TokenKind::BraceO)?.span;
let stmts = self.statement_list()?;
let end_span = self.expect(TokenKind::BraceC)?.span;
exit_parse!(self);
Ok(Block {
stmts,
span: start_span.extend(end_span),
})
}
fn statement(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self);
let stmt = match *self
.peek_kind()
.ok_or_else(|| CompilerError::eof("statement"))?
{
TokenKind::Let => self.declaration(),
TokenKind::Fn => self.fn_decl(),
TokenKind::If => Ok(Stmt::If(self.if_stmt()?)),
TokenKind::Loop => self.loop_stmt(),
TokenKind::While => self.while_stmt(),
TokenKind::Break => self.break_stmt(),
TokenKind::Return => self.return_stmt(),
TokenKind::Print => self.print_stmt(),
TokenKind::BraceO => Ok(Stmt::Block(self.block()?)),
_ => {
let stmt = self.assignment()?;
Ok(stmt)
}
};
exit_parse!(self);
stmt
}
fn declaration(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self);
let keyword_span = self.expect(TokenKind::Let)?.span;
let name = self.ident()?;
self.expect(TokenKind::Equal)?;
let init = self.expression()?;
self.expect(TokenKind::Semi)?;
exit_parse!(self);
Ok(Stmt::Declaration(Declaration {
span: keyword_span.extend(init.span()),
name,
init,
}))
}
fn fn_decl(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self);
let keyword_span = self.expect(TokenKind::Fn)?.span;
let name = self.ident()?;
let args = self.fn_args()?;
self.inside_fn_depth += 1;
let body = self.block()?;
self.inside_fn_depth -= 1;
exit_parse!(self);
Ok(Stmt::FnDecl(FnDecl {
span: keyword_span.extend(body.span),
name,
params: args,
body,
}))
}
fn fn_args(&mut self) -> ParseResult<&'ast [Ident]> {
enter_parse!(self);
self.expect(TokenKind::ParenO)?;
let params = self.parse_list(TokenKind::ParenC, Self::ident)?;
self.expect(TokenKind::ParenC)?;
exit_parse!(self);
Ok(params)
}
fn if_stmt(&mut self) -> ParseResult<IfStmt<'ast>> {
enter_parse!(self);
let keyword_span = self.expect(TokenKind::If)?.span;
let cond = self.expression()?;
let body = self.block()?;
let else_part = if let Some(TokenKind::Else) = self.peek_kind() {
Some(self.else_part()?)
} else {
None
};
exit_parse!(self);
Ok(IfStmt {
span: keyword_span
.extend(body.span)
.option_extend(else_part.as_ref().map(ElsePart::span)),
cond,
body,
else_part: else_part.map(|part| &*self.bump.alloc(part)),
})
}
fn else_part(&mut self) -> ParseResult<ElsePart<'ast>> {
enter_parse!(self);
let keyword_span = self.expect(TokenKind::Else)?.span;
let else_part = if let Some(TokenKind::If) = self.peek_kind() {
let else_if_stmt = self.if_stmt()?;
let else_span = keyword_span.extend(else_if_stmt.span);
Ok(ElsePart::ElseIf(else_if_stmt, else_span))
} else {
let block = self.block()?;
let else_span = keyword_span.extend(block.span);
Ok(ElsePart::Else(block, else_span))
};
exit_parse!(self);
else_part
}
fn loop_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self);
let keyword_span = self.expect(TokenKind::Loop)?.span;
self.inside_loop_depth += 1;
let block = self.block()?;
self.inside_loop_depth -= 1;
let loop_span = keyword_span.extend(block.span);
exit_parse!(self);
Ok(Stmt::Loop(block, keyword_span.extend(loop_span)))
}
fn while_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self);
let keyword_span = self.expect(TokenKind::While)?.span;
let cond = self.expression()?;
self.inside_loop_depth += 1;
let body = self.block()?;
self.inside_loop_depth -= 1;
exit_parse!(self);
Ok(Stmt::While(WhileStmt {
span: keyword_span.extend(body.span),
cond,
body,
}))
}
fn break_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self);
let keyword_span = self.expect(TokenKind::Break)?.span;
let semi_span = self.expect(TokenKind::Semi)?.span;
exit_parse!(self);
if self.inside_loop_depth == 0 {
Err(CompilerError::new(
keyword_span.extend(semi_span),
"break used outside of loop".to_string(),
))
} else {
Ok(Stmt::Break(keyword_span.extend(semi_span)))
}
}
fn return_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self);
let keyword_span = self.expect(TokenKind::Return)?.span;
let expr = if let Some(TokenKind::Semi) = self.peek_kind() {
None
} else {
Some(self.expression()?)
};
let semi_span = self.expect(TokenKind::Semi)?.span;
exit_parse!(self);
if self.inside_fn_depth == 0 {
Err(CompilerError::new(
keyword_span.extend(semi_span),
"return used outside of function".to_string(),
))
} else {
Ok(Stmt::Return(expr, keyword_span.extend(semi_span)))
}
}
fn print_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self);
let print_span = self.expect(TokenKind::Print)?.span;
let expr = self.expression()?;
let semi_span = self.expect(TokenKind::Semi)?.span;
exit_parse!(self);
Ok(Stmt::Print(expr, print_span.extend(semi_span)))
}
fn assignment(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self);
let expr = self.expression()?;
let stmt = if let Some(TokenKind::Equal) = self.peek_kind() {
let _ = self.expect(TokenKind::Equal)?;
let init = self.expression()?;
let semi_span = self.expect(TokenKind::Semi)?.span;
Ok(Stmt::Assignment(Assignment {
span: expr.span().extend(semi_span),
lhs: expr,
rhs: init,
}))
} else {
let _ = self.expect(TokenKind::Semi)?;
Ok(Stmt::Expr(expr))
};
exit_parse!(self);
stmt
}
fn expression(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let return_expr = self.logical_or();
exit_parse!(self);
return_expr
}
fn logical_or(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let lhs = self.logical_and()?;
let return_expr = match self.peek_kind() {
Some(TokenKind::Or) => parse_bin_op!(self, lhs, BinaryOpKind::Or, logical_or),
_ => Ok(lhs),
};
exit_parse!(self);
return_expr
}
fn logical_and(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let lhs = self.equality()?;
let return_expr = match self.peek_kind() {
Some(TokenKind::And) => parse_bin_op!(self, lhs, BinaryOpKind::And, logical_and),
_ => Ok(lhs),
};
exit_parse!(self);
return_expr
}
fn equality(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let lhs = self.comparison()?;
let return_expr = match self.peek_kind() {
Some(TokenKind::BangEqual) => {
parse_bin_op!(self, lhs, BinaryOpKind::NotEqual, comparison)
}
Some(TokenKind::EqualEqual) => {
parse_bin_op!(self, lhs, BinaryOpKind::Equal, comparison)
}
_ => Ok(lhs),
};
exit_parse!(self);
return_expr
}
fn comparison(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let lhs = self.term()?;
let return_expr = match self.peek_kind() {
Some(TokenKind::Greater) => parse_bin_op!(self, lhs, BinaryOpKind::Greater, term),
Some(TokenKind::GreaterEqual) => {
parse_bin_op!(self, lhs, BinaryOpKind::GreaterEqual, term)
}
Some(TokenKind::Less) => parse_bin_op!(self, lhs, BinaryOpKind::Less, term),
Some(TokenKind::LessEqual) => {
parse_bin_op!(self, lhs, BinaryOpKind::LessEqual, term)
}
_ => Ok(lhs),
};
exit_parse!(self);
return_expr
}
fn term(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let lhs = self.factor()?;
let return_expr = match self.peek_kind() {
Some(TokenKind::Plus) => parse_bin_op!(self, lhs, BinaryOpKind::Add, term),
Some(TokenKind::Minus) => parse_bin_op!(self, lhs, BinaryOpKind::Sub, term),
_ => Ok(lhs),
};
exit_parse!(self);
return_expr
}
fn factor(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let lhs = self.unary()?;
let return_expr = match self.peek_kind() {
Some(TokenKind::Asterisk) => parse_bin_op!(self, lhs, BinaryOpKind::Mul, factor),
Some(TokenKind::Slash) => parse_bin_op!(self, lhs, BinaryOpKind::Div, factor),
Some(TokenKind::Percent) => parse_bin_op!(self, lhs, BinaryOpKind::Mod, factor),
_ => Ok(lhs),
};
exit_parse!(self);
return_expr
}
fn unary(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let return_expr = match self.peek_kind() {
Some(TokenKind::Not) => {
let unary_op_span = self.next().unwrap().span;
let expr = self.call()?;
Ok(Expr::UnaryOp(self.bump.alloc(UnaryOp {
span: unary_op_span.extend(expr.span()),
expr,
kind: UnaryOpKind::Not,
})))
}
Some(TokenKind::Minus) => {
let unary_op_span = self.next().unwrap().span;
let expr = self.call()?;
Ok(Expr::UnaryOp(self.bump.alloc(UnaryOp {
span: unary_op_span.extend(expr.span()),
expr,
kind: UnaryOpKind::Neg,
})))
}
_ => self.call(),
};
exit_parse!(self);
return_expr
}
fn call(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let mut expr = self.primary()?;
loop {
expr = match self.peek_kind() {
Some(TokenKind::ParenO) => {
let open_span = self.expect(TokenKind::ParenO)?.span;
let args = self.parse_list(TokenKind::ParenC, Self::expression)?;
let close_span = self.expect(TokenKind::ParenC)?.span;
Expr::Call(self.bump.alloc(Call {
callee: expr,
span: open_span.extend(close_span),
kind: CallKind::Fn(args),
}))
}
Some(TokenKind::Dot) => {
let dot_span = self.expect(TokenKind::Dot)?.span;
let field = self.ident()?;
Expr::Call(self.bump.alloc(Call {
callee: expr,
span: dot_span.extend(field.span),
kind: CallKind::Field(field),
}))
}
_ => break,
}
}
exit_parse!(self);
Ok(expr)
}
fn primary(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let next = self.next().ok_or_else(|| CompilerError::eof("primary"))?;
let return_expr = match next.kind {
TokenKind::String(literal) => Ok(Expr::Literal(Literal::String(literal, next.span))),
TokenKind::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))),
TokenKind::False => Ok(Expr::Literal(Literal::Boolean(false, next.span))),
TokenKind::True => Ok(Expr::Literal(Literal::Boolean(true, next.span))),
TokenKind::Null => Ok(Expr::Literal(Literal::Null(next.span))),
TokenKind::BraceO => self.object_literal(next.span),
TokenKind::BracketO => self.array_literal(next.span),
TokenKind::ParenO => {
let expr = self.expression()?;
let _ = self.expect(TokenKind::ParenC)?;
Ok(expr)
}
TokenKind::Ident(sym) => Ok(Expr::Ident(Ident {
sym,
span: next.span,
})),
TokenKind::Error(error) => Err(*error),
_ => Err(CompilerError::new(
next.span,
format!("invalid token in expression: `{:?}`", next.kind),
)),
};
exit_parse!(self);
return_expr
}
fn ident(&mut self) -> ParseResult<Ident> {
enter_parse!(self);
let Token { kind, span } = self
.next()
.ok_or_else(|| CompilerError::eof("identifier"))?;
let return_expr = match kind {
TokenKind::Ident(sym) => Ok(Ident { sym, span }),
TokenKind::Error(error) => Err(*error),
_ => {
return Err(CompilerError::new(
span,
format!("expected identifier, received `{:?}`", kind),
))
}
};
exit_parse!(self);
return_expr
}
fn object_literal(&mut self, open_span: Span) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let close_span = self.expect(TokenKind::BraceC)?.span;
exit_parse!(self);
Ok(Expr::Literal(Literal::Object(open_span.extend(close_span))))
}
fn array_literal(&mut self, open_span: Span) -> ParseResult<Expr<'ast>> {
enter_parse!(self);
let elements = self.parse_list(TokenKind::BracketC, Self::expression)?;
let closing_bracket = self.expect(TokenKind::BracketC)?;
let return_expr = Ok(Expr::Literal(Literal::Array(
elements,
open_span.extend(closing_bracket.span),
)));
exit_parse!(self);
return_expr
}
fn parse_list<T, F>(&mut self, close: TokenKind, mut parser: F) -> ParseResult<&'ast [T]>
where
F: FnMut(&mut Self) -> ParseResult<T>,
{
enter_parse!(self);
let mut elements = Vec::new_in(self.bump);
if self.peek_kind() == Some(&close) {
return Ok(elements.into_bump_slice());
}
let expr = parser(self)?;
elements.push(expr);
let reached_eof = || {
CompilerError::new(
Span::dummy(),
format!("reached EOF expecting `{:?}`", close.clone()),
)
};
while self.peek_kind().ok_or_else(reached_eof)? != &close {
self.expect(TokenKind::Comma)?;
// trailing comma support
if self.peek_kind() == Some(&close) {
break;
}
let expr = parser(self)?;
elements.push(expr);
}
exit_parse!(self);
Ok(elements.into_bump_slice())
}
// token helpers
fn next(&mut self) -> Option<Token> {
self.tokens.next()
}
fn peek(&mut self) -> Option<&Token> {
self.tokens.peek()
}
fn peek_kind(&mut self) -> Option<&TokenKind> {
self.peek().map(|token| &token.kind)
}
fn expect(&mut self, kind: TokenKind) -> ParseResult<Token> {
if let Some(token) = self.next() {
if token.kind == kind {
Ok(token)
} else if let TokenKind::Error(err) = token.kind {
Err(*err)
} else {
Err(CompilerError::new(
token.span,
format!("expected `{:?}`, received `{:?}`", kind, token.kind),
))
}
} else {
Err(CompilerError::new(
Span::dummy(),
format!("reached EOF expecting `{:?}`", kind),
))
}
}
}
impl CompilerError {
fn eof(message: &str) -> Self {
Self {
// todo: don't
span: Span::dummy(),
message: format!("reached EOF while parsing `{}`", message),
note: None,
}
}
}

View file

@ -1,20 +0,0 @@
/// Statically asserts that the size of the type is x bytes big (on 64-bit)
macro_rules! assert_size {
($name:ident <= $size:expr) => {
#[cfg(target_pointer_width = "64")]
const _: [(); $size] = [(); ::std::mem::size_of::<$name>()];
};
}
pub(crate) use assert_size;
use std::fmt::Display;
#[cfg(feature = "_debug")]
pub fn dbg(prefix: impl Display, x: impl dbg_pls::DebugPls) {
eprintln!("{prefix}{}", dbg_pls::pretty(&x))
}
#[cfg(not(feature = "_debug"))]
pub fn dbg(prefix: impl Display, x: impl std::fmt::Debug) {
eprintln!("{prefix}{x:#?}");
}

312
src/vm.rs
View file

@ -1,312 +0,0 @@
use crate::bytecode::{FnBlock, Function, Instr};
use crate::gc::{Object, RtAlloc, Symbol};
use crate::util;
use crate::Config;
use std::fmt::{Debug, Display, Formatter};
use std::io::{Read, Write};
type VmError = Box<&'static str>;
type VmResult = Result<(), VmError>;
// never get bigger than a machine word.
util::assert_size!(VmResult <= std::mem::size_of::<usize>());
pub fn execute<'bc>(
bytecode: &'bc [FnBlock<'bc>],
alloc: RtAlloc,
cfg: &mut Config,
) -> Result<(), VmError> {
let mut vm = Vm {
blocks: bytecode,
current: bytecode.first().ok_or("no bytecode found")?,
current_block_index: 0,
stack_offset: 0,
pc: 0,
stack: Vec::with_capacity(1024 << 5),
_alloc: alloc,
stdout: cfg.stdout,
step: cfg.step,
};
vm.execute_function()
}
#[derive(Debug, Clone, Copy)]
#[cfg_attr(feature = "_debug", derive(dbg_pls::DebugPls))]
pub enum Value {
/// `null`
Null,
/// A boolean value
Bool(bool),
/// A floating point number
Num(f64),
/// An interned string
String(Symbol),
/// An array of values
Array,
/// A map from string to value
Object(Object),
/// A first-class function object
Function(Function),
/// A value that is stored by the vm for bookkeeping and should never be accessed for anything else
NativeU(usize),
}
util::assert_size!(Value <= 24);
const TRUE: Value = Value::Bool(true);
const FALSE: Value = Value::Bool(false);
struct Vm<'bc, 'io> {
// -- global
blocks: &'bc [FnBlock<'bc>],
_alloc: RtAlloc,
stack: Vec<Value>,
stdout: &'io mut dyn Write,
step: bool,
// -- local to the current function
/// The current function
current: &'bc FnBlock<'bc>,
current_block_index: usize,
/// The offset of the first parameter of the current function
stack_offset: usize,
/// Index of the next instruction being executed. is out of bounds if the current
/// instruction is the last one
pc: usize,
}
impl<'bc> Vm<'bc, '_> {
fn execute_function(&mut self) -> VmResult {
loop {
let instr = self.current.code.get(self.pc);
self.pc += 1;
match instr {
Some(&instr) => self.dispatch_instr(instr)?,
None => return Ok(()),
}
if self.pc > 0 {
debug_assert_eq!(self.current.stack_sizes[self.pc - 1], self.stack.len());
}
}
}
fn dispatch_instr(&mut self, instr: Instr) -> VmResult {
if self.step {
self.step_debug();
}
match instr {
Instr::Nop => {}
Instr::Store(index) => {
let val = self.stack.pop().unwrap();
self.stack[self.stack_offset + index] = val;
}
Instr::Load(index) => self.stack.push(self.stack[self.stack_offset + index]),
Instr::PushVal(value) => self.stack.push(value),
Instr::Neg => {
let val = self.stack.pop().unwrap();
match val {
Value::Bool(bool) => self.stack.push(Value::Bool(!bool)),
Value::Num(float) => self.stack.push(Value::Num(-float)),
_ => return Err(self.type_error()),
}
}
Instr::BinAdd => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a + b)),
_ => Err("bad type".into()),
})?,
Instr::BinSub => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a - b)),
_ => Err("bad type".into()),
})?,
Instr::BinMul => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a * b)),
_ => Err("bad type".into()),
})?,
Instr::BinDiv => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a / b)),
_ => Err("bad type".into()),
})?,
Instr::BinMod => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Num(a % b)),
_ => Err("bad type".into()),
})?,
Instr::BinAnd => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Bool(a), Value::Bool(b)) => Ok(Value::Bool(a && b)),
_ => Err("bad type".into()),
})?,
Instr::BinOr => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Bool(a), Value::Bool(b)) => Ok(Value::Bool(a || b)),
_ => Err("bad type".into()),
})?,
Instr::CmpGreater => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Bool(a > b)),
(Value::String(a), Value::String(b)) => Ok(Value::Bool(a.as_str() > b.as_str())),
_ => Err("bad type".into()),
})?,
Instr::CmpGreaterEq => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Bool(a >= b)),
(Value::String(a), Value::String(b)) => Ok(Value::Bool(a.as_str() >= b.as_str())),
_ => Err("bad type".into()),
})?,
Instr::CmpLess => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Bool(a < b)),
(Value::String(a), Value::String(b)) => Ok(Value::Bool(a.as_str() < b.as_str())),
_ => Err("bad type".into()),
})?,
Instr::CmpLessEq => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Num(a), Value::Num(b)) => Ok(Value::Bool(a <= b)),
(Value::String(a), Value::String(b)) => Ok(Value::Bool(a.as_str() <= b.as_str())),
_ => Err("bad type".into()),
})?,
Instr::CmpEq => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Null, Value::Null) => Ok(TRUE),
(Value::Num(a), Value::Num(b)) => Ok(Value::Bool(a == b)),
(Value::String(a), Value::String(b)) => Ok(Value::Bool(a == b)),
(Value::Object(_a), Value::Object(_b)) => todo!(),
(Value::Array, Value::Array) => Ok(TRUE),
_ => Err("bad type".into()),
})?,
Instr::CmpNotEq => self.bin_op(|lhs, rhs| match (lhs, rhs) {
(Value::Null, Value::Null) => Ok(FALSE),
(Value::Num(a), Value::Num(b)) => Ok(Value::Bool(a != b)),
(Value::String(a), Value::String(b)) => Ok(Value::Bool(a != b)),
(Value::Object(_a), Value::Object(_b)) => todo!(),
(Value::Array, Value::Array) => Ok(FALSE),
_ => Err("bad type".into()),
})?,
Instr::Print => {
let val = self.stack.pop().unwrap();
writeln!(self.stdout, "{}", val).map_err(|_| "failed to write to stdout")?;
}
Instr::JmpFalse(pos) => {
let val = self.stack.pop().unwrap();
match val {
Value::Bool(false) => self.pc = (self.pc as isize + pos) as usize,
Value::Bool(true) => {}
_ => return Err("bad type".into()),
}
}
Instr::Jmp(pos) => self.pc = (self.pc as isize + pos) as usize,
Instr::Call => self.call()?,
Instr::Return => return Ok(()),
Instr::ShrinkStack(size) => {
assert!(self.stack.len() >= size);
let new_len = self.stack.len() - size;
// SAFETY: We only ever shrink the vec, and we don't overflow. Value is copy so no leaks as a bonus
unsafe { self.stack.set_len(new_len) }
}
}
Ok(())
}
fn bin_op<F>(&mut self, f: F) -> VmResult
where
F: FnOnce(Value, Value) -> Result<Value, VmError>,
{
let rhs = self.stack.pop().unwrap();
let lhs = self.stack.pop().unwrap();
let result = f(lhs, rhs)?;
self.stack.push(result);
Ok(())
}
fn call(&mut self) -> VmResult {
let old_offset = self.stack_offset;
let old_idx = self.current_block_index;
let function = self.stack.pop().unwrap();
let function = function.unwrap_function();
let fn_block = &self.blocks[function];
let new_stack_frame_start = self.stack.len();
self.stack_offset = new_stack_frame_start;
self.stack.push(Value::NativeU(old_offset));
self.stack.push(Value::NativeU(self.pc));
self.stack.push(Value::Function(old_idx));
self.current_block_index = function;
self.current = fn_block;
self.pc = 0;
// we are now set up correctly, let the next instruction run
Ok(())
}
fn ret(&mut self) -> VmResult {
let current_arity: usize = self.current.arity.try_into().unwrap();
let bookkeeping_offset = self.stack_offset + current_arity;
let old_stack_offset = self.stack[bookkeeping_offset].unwrap_native_int();
let old_pc = self.stack[bookkeeping_offset + 1].unwrap_native_int();
let old_function = self.stack[bookkeeping_offset + 2].unwrap_function();
self.stack_offset = old_stack_offset;
self.pc = old_pc;
self.current_block_index = old_function;
self.current = &self.blocks[old_function];
Ok(())
}
fn type_error(&self) -> VmError {
"bad type".into()
}
fn step_debug(&self) {
let current_instr = &self.current.code[self.pc];
let curr_stack_size = self.stack.len();
let expected_stack_size = &self.current.stack_sizes[self.pc];
eprintln!(
"Current Instruction: {:?}
Current Stack size: {}
Expected Stack size after instruction: {}",
current_instr, curr_stack_size, expected_stack_size
);
let mut buf = [0; 64];
let _ = std::io::stdin().read(&mut buf);
}
}
impl Value {
/// Unwrap the Value into a `usize` expecting the `NativeU` variant
fn unwrap_native_int(&self) -> usize {
if let Value::NativeU(n) = self {
*n
} else {
unreachable!("expected native int, got {:?}", self);
}
}
/// Unwrap the Value into a `Function` expecting the `Function` variant
fn unwrap_function(&self) -> Function {
if let Value::Function(fun) = self {
*fun
} else {
unreachable!("expected function, got {:?}", self);
}
}
}
impl Display for Value {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Value::Null => f.write_str("null"),
Value::Bool(bool) => Display::fmt(bool, f),
Value::Num(num) => Display::fmt(num, f),
Value::String(str) => f.write_str(str.as_str()),
Value::Array => todo!(),
Value::Object(_) => todo!(),
Value::Function(_) => f.write_str("[function]"),
Value::NativeU(_) => panic!("Called display on native value!"),
}
}
}