From e26e849b5642ecaa18fc64411fafee1446a3ac30 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Thu, 30 Dec 2021 13:19:52 +0100 Subject: [PATCH] bump alloc --- Cargo.lock | 7 ++ Cargo.toml | 1 + clippy.toml | 10 +- src/ast.rs | 142 +++++++++++++------------- src/bytecode.rs | 18 ++-- src/compile.rs | 40 +++++--- src/lex.rs | 4 +- src/lib.rs | 47 ++++----- src/parse.rs | 258 +++++++++++++++++++++++++++++------------------- 9 files changed, 302 insertions(+), 225 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 93945d8..40e1748 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "bumpalo" +version = "3.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1e260c3a9040a7c19a12468758f4c16f31a81a1fe087482be9570ec864bb6c" + [[package]] name = "console" version = "0.14.1" @@ -25,6 +31,7 @@ dependencies = [ name = "dilaria" version = "0.1.0" dependencies = [ + "bumpalo", "insta", "rustc-hash", ] diff --git a/Cargo.toml b/Cargo.toml index ea2ffbb..c4fd474 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bumpalo = { version = "3.8.0", features = ["collections", "boxed"] } rustc-hash = { version = "1.1.0", optional = true } diff --git a/clippy.toml b/clippy.toml index faa64c0..7c0ab05 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1,2 +1,8 @@ -# we want to use our custom type from `values.rs`, so that consumers can choose between which HashMap they want -disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"] +disallowed-types = [ + # we want to use our custom HashMap/HashSet from `values.rs`, so that consumers can choose between which HashMap they want + "std::collections::HashMap", + "std::collections::HashSet", + # we want to use bumpalo::collections::Vec, this can be removed later I guess + "std::collections::Vec", + "std::boxed::Box", +] diff --git a/src/ast.rs b/src/ast.rs index ab65863..6b69d47 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -3,74 +3,76 @@ use crate::errors::Span; use crate::value::Symbol; +use bumpalo::boxed::Box; +use bumpalo::collections::Vec; -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, PartialEq, Eq, Hash)] pub struct Ident { pub sym: Symbol, pub span: Span, } -#[derive(Debug, Clone, PartialEq)] -pub struct Program(pub Vec); +#[derive(Debug, PartialEq)] +pub struct Program<'ast>(pub Vec<'ast, Stmt<'ast>>); -#[derive(Debug, Clone, PartialEq)] -pub struct Block { - pub stmts: Vec, +#[derive(Debug, PartialEq)] +pub struct Block<'ast> { + pub stmts: Vec<'ast, Stmt<'ast>>, pub span: Span, } -#[derive(Debug, Clone, PartialEq)] -pub enum Stmt { - Declaration(Declaration), - Assignment(Assignment), - FnDecl(FnDecl), - If(IfStmt), - Loop(Block, Span), - While(WhileStmt), +#[derive(Debug, PartialEq)] +pub enum Stmt<'ast> { + Declaration(Declaration<'ast>), + Assignment(Assignment<'ast>), + FnDecl(FnDecl<'ast>), + If(IfStmt<'ast>), + Loop(Block<'ast>, Span), + While(WhileStmt<'ast>), Break(Span), - Return(Option, Span), - Block(Block), - Expr(Expr), - Print(Expr, Span), + Return(Option>, Span), + Block(Block<'ast>), + Expr(Expr<'ast>), + Print(Expr<'ast>, Span), } -#[derive(Debug, Clone, PartialEq)] -pub struct Declaration { +#[derive(Debug, PartialEq)] +pub struct Declaration<'ast> { pub span: Span, pub name: Ident, - pub init: Expr, + pub init: Expr<'ast>, } -#[derive(Debug, Clone, PartialEq)] -pub struct Assignment { +#[derive(Debug, PartialEq)] +pub struct Assignment<'ast> { pub span: Span, - pub lhs: Expr, - pub rhs: Expr, + pub lhs: Expr<'ast>, + pub rhs: Expr<'ast>, } -#[derive(Debug, Clone, PartialEq)] -pub struct FnDecl { +#[derive(Debug, PartialEq)] +pub struct FnDecl<'ast> { pub span: Span, pub name: Ident, - pub params: Vec, - pub body: Block, + pub params: Vec<'ast, Ident>, + pub body: Block<'ast>, } -#[derive(Debug, Clone, PartialEq)] -pub struct IfStmt { +#[derive(Debug, PartialEq)] +pub struct IfStmt<'ast> { pub span: Span, - pub cond: Expr, - pub body: Block, - pub else_part: Option>, + pub cond: Expr<'ast>, + pub body: Block<'ast>, + pub else_part: Option>>, } -#[derive(Debug, Clone, PartialEq)] -pub enum ElsePart { - Else(Block, Span), - ElseIf(IfStmt, Span), +#[derive(Debug, PartialEq)] +pub enum ElsePart<'ast> { + Else(Block<'ast>, Span), + ElseIf(IfStmt<'ast>, Span), } -impl ElsePart { +impl ElsePart<'_> { pub fn span(&self) -> Span { match self { ElsePart::Else(_, span) => *span, @@ -79,23 +81,23 @@ impl ElsePart { } } -#[derive(Debug, Clone, PartialEq)] -pub struct WhileStmt { +#[derive(Debug, PartialEq)] +pub struct WhileStmt<'ast> { pub span: Span, - pub cond: Expr, - pub body: Block, + pub cond: Expr<'ast>, + pub body: Block<'ast>, } -#[derive(Debug, Clone, PartialEq)] -pub enum Expr { +#[derive(Debug, PartialEq)] +pub enum Expr<'ast> { Ident(Ident), - Literal(Literal), - UnaryOp(Box), - BinaryOp(Box), - Call(Box), + Literal(Literal<'ast>), + UnaryOp(Box<'ast, UnaryOp<'ast>>), + BinaryOp(Box<'ast, BinaryOp<'ast>>), + Call(Box<'ast, Call<'ast>>), } -impl Expr { +impl Expr<'_> { pub fn span(&self) -> Span { match self { Expr::Literal(lit) => lit.span(), @@ -107,17 +109,17 @@ impl Expr { } } -#[derive(Debug, Clone, PartialEq)] -pub enum Literal { +#[derive(Debug, PartialEq)] +pub enum Literal<'ast> { String(String, Span), Number(f64, Span), - Array(Vec, Span), + Array(Vec<'ast, Expr<'ast>>, Span), Object(Span), Boolean(bool, Span), Null(Span), } -impl Literal { +impl Literal<'_> { pub fn span(&self) -> Span { match self { Literal::String(_, span) => *span, @@ -130,28 +132,28 @@ impl Literal { } } -#[derive(Debug, Clone, PartialEq)] -pub struct UnaryOp { +#[derive(Debug, PartialEq)] +pub struct UnaryOp<'ast> { pub span: Span, - pub expr: Expr, + pub expr: Expr<'ast>, pub kind: UnaryOpKind, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, PartialEq)] pub enum UnaryOpKind { Not, Neg, } -#[derive(Debug, Clone, PartialEq)] -pub struct BinaryOp { +#[derive(Debug, PartialEq)] +pub struct BinaryOp<'ast> { pub span: Span, - pub lhs: Expr, - pub rhs: Expr, + pub lhs: Expr<'ast>, + pub rhs: Expr<'ast>, pub kind: BinaryOpKind, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, PartialEq)] pub enum BinaryOpKind { And, Or, @@ -168,15 +170,15 @@ pub enum BinaryOpKind { Mod, } -#[derive(Debug, Clone, PartialEq)] -pub struct Call { - pub callee: Expr, +#[derive(Debug, PartialEq)] +pub struct Call<'ast> { + pub callee: Expr<'ast>, pub span: Span, - pub kind: CallKind, + pub kind: CallKind<'ast>, } -#[derive(Debug, Clone, PartialEq)] -pub enum CallKind { +#[derive(Debug, PartialEq)] +pub enum CallKind<'ast> { Field(Ident), - Fn(Vec), + Fn(Vec<'ast, Expr<'ast>>), } diff --git a/src/bytecode.rs b/src/bytecode.rs index 7842ad2..5a36efa 100644 --- a/src/bytecode.rs +++ b/src/bytecode.rs @@ -1,24 +1,26 @@ use crate::errors::Span; use crate::value::{HashMap, Symbol}; +use bumpalo::boxed::Box; +use bumpalo::collections::Vec; use std::rc::Rc; -#[derive(Debug, Default)] -pub struct FnBlock { - pub code: Vec, - pub stack_sizes: Vec, - pub spans: Vec, +#[derive(Debug)] +pub struct FnBlock<'bc> { + pub code: Vec<'bc, Instr<'bc>>, + pub stack_sizes: Vec<'bc, usize>, + pub spans: Vec<'bc, Span>, pub arity: u8, } // todo: this should be copy in the end tbh #[derive(Debug)] -pub enum Instr { +pub enum Instr<'bc> { /// Store the current value on the stack to the stack location with the local offset `usize` Store(usize), /// Load the variable value from the local offset `usize` onto the stack Load(usize), /// Push a value onto the stack - PushVal(Box), + PushVal(Box<'bc, Value>), /// Negate the top value on the stack. Only works with numbers and booleans Neg, BinAdd, @@ -45,6 +47,6 @@ pub enum Value { Bool(bool), Num(f64), String(Rc), - Array(Vec), + Array, Object(HashMap), } diff --git a/src/compile.rs b/src/compile.rs index 622109e..2fb69a6 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -5,6 +5,9 @@ use crate::ast::{ use crate::bytecode::{FnBlock, Instr, Value}; use crate::errors::{CompilerError, Span}; use crate::value::{HashMap, Symbol}; +use bumpalo::boxed::Box; +use bumpalo::collections::Vec; +use bumpalo::Bump; use std::cell::RefCell; use std::rc::Rc; @@ -39,25 +42,39 @@ impl Env { } } -#[derive(Debug, Default)] -struct Compiler { - blocks: Vec, +#[derive(Debug)] +struct Compiler<'bc> { + blocks: Vec<'bc, FnBlock<'bc>>, current_block: usize, + bump: &'bc Bump, /// the current local variables that are in scope, only needed for compiling env: Rc>, } -pub fn compile(ast: &Program) -> Result, CompileError> { - let mut compiler = Compiler::default(); +pub fn compile<'bc>( + ast: &Program, + bytecode_bump: &'bc Bump, +) -> Result>, CompileError> { + let mut compiler = Compiler { + blocks: Vec::new_in(bytecode_bump), + current_block: 0, + bump: bytecode_bump, + env: Rc::new(RefCell::new(Default::default())), + }; compiler.compile(ast)?; Ok(compiler.blocks) } -impl Compiler { +impl<'bc> Compiler<'bc> { fn compile(&mut self, ast: &Program) -> CResult<()> { - let global_block = FnBlock::default(); + let global_block = FnBlock { + code: Vec::new_in(self.bump), + stack_sizes: Vec::new_in(self.bump), + spans: Vec::new_in(self.bump), + arity: 0, + }; self.blocks.push(global_block); self.current_block = self.blocks.len() - 1; self.compile_stmts(&ast.0)?; @@ -180,7 +197,7 @@ impl Compiler { Literal::Number(num, _) => Value::Num(*num), Literal::Array(vec, _) => { if vec.is_empty() { - Value::Array(Vec::new()) + Value::Array } else { todo!() } @@ -191,7 +208,7 @@ impl Compiler { }; self.push_instr( - Instr::PushVal(Box::new(value)), + Instr::PushVal(Box::new_in(value, self.bump)), StackChange::Grow, lit.span(), ); @@ -244,7 +261,7 @@ impl Compiler { *block.stack_sizes.last().expect("empty stack") - 1 } - fn push_instr(&mut self, instr: Instr, stack_change: StackChange, span: Span) { + fn push_instr(&mut self, instr: Instr<'bc>, stack_change: StackChange, span: Span) { let block = &mut self.blocks[self.current_block]; let stack_top = block.stack_sizes.last().copied().unwrap_or(0); let new_stack_top = stack_top as isize + stack_change as isize; @@ -299,6 +316,5 @@ impl CompilerError for CompileError { } } - #[cfg(test)] -mod test {} \ No newline at end of file +mod test {} diff --git a/src/lex.rs b/src/lex.rs index bb5b32c..6ab3a9b 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -355,7 +355,7 @@ fn is_valid_ident_start(char: char) -> bool { char.is_alphabetic() || char == '_' } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct LexError { pub span: Span, pub kind: LexErrorKind, @@ -399,7 +399,7 @@ impl CompilerError for LexError { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum LexErrorKind { InvalidCharacter(char), InvalidFloat(std::num::ParseFloatError), diff --git a/src/lib.rs b/src/lib.rs index f64ee8b..b632666 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,40 +8,33 @@ mod lex; mod parse; mod value; +use crate::ast::Program; +use bumpalo::Bump; pub use lex::*; pub use parse::*; pub fn run_program(program: &str) { let lexer = lex::Lexer::lex(program); - let (success, errors) = lexer.partition::, _>(|result| result.is_ok()); - if errors.is_empty() { - let tokens = success.into_iter().collect::, _>>().unwrap(); + let ast_alloc = Bump::new(); - println!( - "Tokens:\n{:?}\n", - tokens.iter().map(|token| &token.kind).collect::>() - ); + let ast = parse::parse(lexer, &ast_alloc); - let ast = parse::parse(tokens); - - match ast { - Ok(ast) => { - println!("AST:\n{:?}\n", ast); - - let bytecode = compile::compile(&ast); - - match bytecode { - Ok(code) => println!("Bytecode:\n{:#?}\n", code), - Err(err) => errors::display_error(program, err), - } - } - Err(err) => errors::display_error(program, err), - } - } else { - errors - .into_iter() - .map(Result::unwrap_err) - .for_each(|err| errors::display_error(program, err)); + match ast { + Ok(ast) => process_ast(program, ast), + Err(err) => errors::display_error(program, err), + } +} + +fn process_ast(program: &str, ast: Program) { + println!("AST:\n{:?}\n", ast); + + let bytecode_alloc = Bump::new(); + + let bytecode = compile::compile(&ast, &bytecode_alloc); + + match bytecode { + Ok(code) => println!("Bytecode:\n{:#?}\n", code), + Err(err) => errors::display_error(program, err), } } diff --git a/src/parse.rs b/src/parse.rs index 6480473..5b3f4e0 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -4,25 +4,38 @@ mod test; use crate::ast::*; use crate::errors::{CompilerError, Span}; use crate::lex::{Token, TokenType}; +use crate::LexError; +use bumpalo::boxed::Box; +use bumpalo::collections::Vec; +use bumpalo::Bump; use std::iter::Peekable; -pub fn parse(tokens: Vec) -> Result { - let mut parser = Parser { - tokens: tokens.into_iter().peekable(), - depth: 0, - inside_fn_depth: 0, - inside_loop_depth: 0, - }; - let program = parser.program()?; - Ok(program) -} - #[derive(Debug)] -struct Parser<'code> { - tokens: Peekable>>, +struct Parser<'code, 'ast, I> +where + I: Iterator, LexError>>, + I: 'code, +{ + tokens: Peekable, depth: usize, inside_fn_depth: usize, inside_loop_depth: usize, + bump: &'ast Bump, +} + +pub fn parse<'ast, 'code>( + tokens: impl Iterator, LexError>> + 'code, + ast_bump: &'ast Bump, +) -> Result, ParseErr<'code>> { + let mut parser = Parser { + tokens: tokens.peekable(), + depth: 0, + inside_fn_depth: 0, + inside_loop_depth: 0, + bump: ast_bump, + }; + let program = parser.program()?; + Ok(program) } type ParseResult<'code, T> = Result>; @@ -31,12 +44,15 @@ macro_rules! parse_bin_op { ($self: ident, $lhs: ident, $kind: expr, $function: ident) => {{ let _ = $self.next(); let rhs = $self.$function()?; - Ok(Expr::BinaryOp(Box::new(BinaryOp { - span: $lhs.span().extend(rhs.span()), - lhs: $lhs, - rhs, - kind: $kind, - }))) + Ok(Expr::BinaryOp(Box::new_in( + BinaryOp { + span: $lhs.span().extend(rhs.span()), + lhs: $lhs, + rhs, + kind: $kind, + }, + $self.bump, + ))) }}; } @@ -56,26 +72,30 @@ macro_rules! enter_parse { }; } -impl<'code> Parser<'code> { +impl<'code, 'ast, I> Parser<'code, 'ast, I> +where + I: Iterator, LexError>>, + I: 'code, +{ const MAX_DEPTH: usize = 100; - fn program(&mut self) -> ParseResult<'code, Program> { + fn program(&mut self) -> ParseResult<'code, Program<'ast>> { Ok(Program(self.statement_list()?)) } fn too_nested_error(&mut self) -> ParseResult<'code, ()> { - let next_token = self.next(); + let next_token = self.next()?; match next_token { Some(token) => Err(ParseErr::MaxDepth(token.span)), None => Err(ParseErr::Eof("reached EOF while being nested to deeply")), } } - fn statement_list(&mut self) -> ParseResult<'code, Vec> { + fn statement_list(&mut self) -> ParseResult<'code, Vec<'ast, Stmt<'ast>>> { enter_parse!(self); - let mut stmts = Vec::new(); + let mut stmts = Vec::new_in(self.bump); let return_stmts = loop { - if let Some(TokenType::BraceC) | None = self.peek_kind() { + if let Some(TokenType::BraceC) | None = self.peek_kind()? { break Ok(stmts); } let stmt = self.statement()?; @@ -85,7 +105,7 @@ impl<'code> Parser<'code> { return_stmts } - fn block(&mut self) -> ParseResult<'code, Block> { + fn block(&mut self) -> ParseResult<'code, Block<'ast>> { enter_parse!(self); let start_span = self.expect(TokenType::BraceO)?.span; @@ -100,10 +120,10 @@ impl<'code> Parser<'code> { }) } - fn statement(&mut self) -> ParseResult<'code, Stmt> { + fn statement(&mut self) -> ParseResult<'code, Stmt<'ast>> { enter_parse!(self); - let stmt = match *self.peek_kind().ok_or(ParseErr::Eof("statement"))? { + let stmt = match *self.peek_kind()?.ok_or(ParseErr::Eof("statement"))? { TokenType::Let => self.declaration(), TokenType::Fn => self.fn_decl(), TokenType::If => Ok(Stmt::If(self.if_stmt()?)), @@ -122,7 +142,7 @@ impl<'code> Parser<'code> { stmt } - fn declaration(&mut self) -> ParseResult<'code, Stmt> { + fn declaration(&mut self) -> ParseResult<'code, Stmt<'ast>> { enter_parse!(self); let keyword_span = self.expect(TokenType::Let)?.span; @@ -140,7 +160,7 @@ impl<'code> Parser<'code> { })) } - fn fn_decl(&mut self) -> ParseResult<'code, Stmt> { + fn fn_decl(&mut self) -> ParseResult<'code, Stmt<'ast>> { enter_parse!(self); let keyword_span = self.expect(TokenType::Fn)?.span; @@ -161,7 +181,7 @@ impl<'code> Parser<'code> { })) } - fn fn_args(&mut self) -> ParseResult<'code, Vec> { + fn fn_args(&mut self) -> ParseResult<'code, Vec<'ast, Ident>> { enter_parse!(self); self.expect(TokenType::ParenO)?; @@ -173,14 +193,14 @@ impl<'code> Parser<'code> { Ok(params) } - fn if_stmt(&mut self) -> ParseResult<'code, IfStmt> { + fn if_stmt(&mut self) -> ParseResult<'code, IfStmt<'ast>> { enter_parse!(self); let keyword_span = self.expect(TokenType::If)?.span; let cond = self.expression()?; let body = self.block()?; - let else_part = if let Some(TokenType::Else) = self.peek_kind() { + let else_part = if let Some(TokenType::Else) = self.peek_kind()? { Some(self.else_part()?) } else { None @@ -194,16 +214,16 @@ impl<'code> Parser<'code> { .option_extend(else_part.as_ref().map(|part| part.span())), cond, body, - else_part: else_part.map(Box::new), + else_part: else_part.map(|part| Box::new_in(part, self.bump)), }) } - fn else_part(&mut self) -> ParseResult<'code, ElsePart> { + fn else_part(&mut self) -> ParseResult<'code, ElsePart<'ast>> { enter_parse!(self); let keyword_span = self.expect(TokenType::Else)?.span; - let else_part = if let Some(TokenType::If) = self.peek_kind() { + let else_part = if let Some(TokenType::If) = self.peek_kind()? { let else_if_stmt = self.if_stmt()?; let else_span = keyword_span.extend(else_if_stmt.span); Ok(ElsePart::ElseIf(else_if_stmt, else_span)) @@ -218,7 +238,7 @@ impl<'code> Parser<'code> { else_part } - fn loop_stmt(&mut self) -> ParseResult<'code, Stmt> { + fn loop_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { enter_parse!(self); let keyword_span = self.expect(TokenType::Loop)?.span; @@ -234,7 +254,7 @@ impl<'code> Parser<'code> { Ok(Stmt::Loop(block, keyword_span.extend(loop_span))) } - fn while_stmt(&mut self) -> ParseResult<'code, Stmt> { + fn while_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { enter_parse!(self); let keyword_span = self.expect(TokenType::While)?.span; @@ -253,7 +273,7 @@ impl<'code> Parser<'code> { })) } - fn break_stmt(&mut self) -> ParseResult<'code, Stmt> { + fn break_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { enter_parse!(self); let keyword_span = self.expect(TokenType::Break)?.span; @@ -268,12 +288,12 @@ impl<'code> Parser<'code> { } } - fn return_stmt(&mut self) -> ParseResult<'code, Stmt> { + fn return_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { enter_parse!(self); let keyword_span = self.expect(TokenType::Return)?.span; - let expr = if let Some(TokenType::Semi) = self.peek_kind() { + let expr = if let Some(TokenType::Semi) = self.peek_kind()? { None } else { Some(self.expression()?) @@ -292,7 +312,7 @@ impl<'code> Parser<'code> { } } - fn print_stmt(&mut self) -> ParseResult<'code, Stmt> { + fn print_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { enter_parse!(self); let print_span = self.expect(TokenType::Print)?.span; @@ -306,12 +326,12 @@ impl<'code> Parser<'code> { Ok(Stmt::Print(expr, print_span.extend(semi_span))) } - fn assignment(&mut self) -> ParseResult<'code, Stmt> { + fn assignment(&mut self) -> ParseResult<'code, Stmt<'ast>> { enter_parse!(self); let expr = self.expression()?; - let stmt = if let Some(TokenType::Equal) = self.peek_kind() { + let stmt = if let Some(TokenType::Equal) = self.peek_kind()? { let _ = self.expect(TokenType::Equal)?; let init = self.expression()?; let semi_span = self.expect(TokenType::Semi)?.span; @@ -329,18 +349,18 @@ impl<'code> Parser<'code> { stmt } - fn expression(&mut self) -> ParseResult<'code, Expr> { + fn expression(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let return_expr = self.logical_or(); exit_parse!(self); return_expr } - fn logical_or(&mut self) -> ParseResult<'code, Expr> { + fn logical_or(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let lhs = self.logical_and()?; - let return_expr = match self.peek_kind() { + let return_expr = match self.peek_kind()? { Some(TokenType::Or) => parse_bin_op!(self, lhs, BinaryOpKind::Or, logical_or), _ => Ok(lhs), }; @@ -349,11 +369,11 @@ impl<'code> Parser<'code> { return_expr } - fn logical_and(&mut self) -> ParseResult<'code, Expr> { + fn logical_and(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let lhs = self.equality()?; - let return_expr = match self.peek_kind() { + let return_expr = match self.peek_kind()? { Some(TokenType::And) => parse_bin_op!(self, lhs, BinaryOpKind::And, logical_and), _ => Ok(lhs), }; @@ -362,11 +382,11 @@ impl<'code> Parser<'code> { return_expr } - fn equality(&mut self) -> ParseResult<'code, Expr> { + fn equality(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let lhs = self.comparison()?; - let return_expr = match self.peek_kind() { + let return_expr = match self.peek_kind()? { Some(TokenType::BangEqual) => { parse_bin_op!(self, lhs, BinaryOpKind::NotEqual, comparison) } @@ -379,11 +399,11 @@ impl<'code> Parser<'code> { return_expr } - fn comparison(&mut self) -> ParseResult<'code, Expr> { + fn comparison(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let lhs = self.term()?; - let return_expr = match self.peek_kind() { + let return_expr = match self.peek_kind()? { Some(TokenType::Greater) => parse_bin_op!(self, lhs, BinaryOpKind::Greater, term), Some(TokenType::GreaterEqual) => { parse_bin_op!(self, lhs, BinaryOpKind::GreaterEqual, term) @@ -398,11 +418,11 @@ impl<'code> Parser<'code> { return_expr } - fn term(&mut self) -> ParseResult<'code, Expr> { + fn term(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let lhs = self.factor()?; - let return_expr = match self.peek_kind() { + let return_expr = match self.peek_kind()? { Some(TokenType::Plus) => parse_bin_op!(self, lhs, BinaryOpKind::Add, term), Some(TokenType::Minus) => parse_bin_op!(self, lhs, BinaryOpKind::Sub, term), _ => Ok(lhs), @@ -411,11 +431,11 @@ impl<'code> Parser<'code> { return_expr } - fn factor(&mut self) -> ParseResult<'code, Expr> { + fn factor(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let lhs = self.unary()?; - let return_expr = match self.peek_kind() { + let return_expr = match self.peek_kind()? { Some(TokenType::Asterisk) => parse_bin_op!(self, lhs, BinaryOpKind::Mul, factor), Some(TokenType::Slash) => parse_bin_op!(self, lhs, BinaryOpKind::Div, factor), Some(TokenType::Percent) => parse_bin_op!(self, lhs, BinaryOpKind::Mod, factor), @@ -425,27 +445,33 @@ impl<'code> Parser<'code> { return_expr } - fn unary(&mut self) -> ParseResult<'code, Expr> { + fn unary(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); - let return_expr = match self.peek_kind() { + let return_expr = match self.peek_kind()? { Some(TokenType::Not) => { - let unary_op_span = self.next().unwrap().span; + let unary_op_span = self.next()?.unwrap().span; let expr = self.call()?; - Ok(Expr::UnaryOp(Box::new(UnaryOp { - span: unary_op_span.extend(expr.span()), - expr, - kind: UnaryOpKind::Not, - }))) + Ok(Expr::UnaryOp(Box::new_in( + UnaryOp { + span: unary_op_span.extend(expr.span()), + expr, + kind: UnaryOpKind::Not, + }, + self.bump, + ))) } Some(TokenType::Minus) => { - let unary_op_span = self.next().unwrap().span; + let unary_op_span = self.next()?.unwrap().span; let expr = self.call()?; - Ok(Expr::UnaryOp(Box::new(UnaryOp { - span: unary_op_span.extend(expr.span()), - expr, - kind: UnaryOpKind::Neg, - }))) + Ok(Expr::UnaryOp(Box::new_in( + UnaryOp { + span: unary_op_span.extend(expr.span()), + expr, + kind: UnaryOpKind::Neg, + }, + self.bump, + ))) } _ => self.call(), }; @@ -453,33 +479,39 @@ impl<'code> Parser<'code> { return_expr } - fn call(&mut self) -> ParseResult<'code, Expr> { + fn call(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let mut expr = self.primary()?; loop { - expr = match self.peek_kind() { + expr = match self.peek_kind()? { Some(TokenType::ParenO) => { let open_span = self.expect(TokenType::ParenO)?.span; let args = self.parse_list(TokenType::ParenC, Self::expression)?; let close_span = self.expect(TokenType::ParenC)?.span; - Expr::Call(Box::new(Call { - callee: expr, - span: open_span.extend(close_span), - kind: CallKind::Fn(args), - })) + Expr::Call(Box::new_in( + Call { + callee: expr, + span: open_span.extend(close_span), + kind: CallKind::Fn(args), + }, + self.bump, + )) } Some(TokenType::Dot) => { let dot_span = self.expect(TokenType::Dot)?.span; let field = self.ident()?; - Expr::Call(Box::new(Call { - callee: expr, - span: dot_span.extend(field.span), - kind: CallKind::Field(field), - })) + Expr::Call(Box::new_in( + Call { + callee: expr, + span: dot_span.extend(field.span), + kind: CallKind::Field(field), + }, + self.bump, + )) } _ => break, } @@ -490,10 +522,10 @@ impl<'code> Parser<'code> { Ok(expr) } - fn primary(&mut self) -> ParseResult<'code, Expr> { + fn primary(&mut self) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); - let next = self.next().ok_or(ParseErr::Eof("primary"))?; + let next = self.next()?.ok_or(ParseErr::Eof("primary"))?; let return_expr = match next.kind { TokenType::String(literal) => Ok(Expr::Literal(Literal::String(literal, next.span))), TokenType::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))), @@ -523,7 +555,7 @@ impl<'code> Parser<'code> { fn ident(&mut self) -> ParseResult<'code, Ident> { enter_parse!(self); - let Token { kind, span } = self.next().ok_or(ParseErr::Eof("identifier"))?; + let Token { kind, span } = self.next()?.ok_or(ParseErr::Eof("identifier"))?; let return_expr = match kind { TokenType::Ident(name) => { let name_owned = name.to_owned(); @@ -543,7 +575,7 @@ impl<'code> Parser<'code> { return_expr } - fn object_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr> { + fn object_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let close_span = self.expect(TokenType::BraceC)?.span; @@ -552,7 +584,7 @@ impl<'code> Parser<'code> { Ok(Expr::Literal(Literal::Object(open_span.extend(close_span)))) } - fn array_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr> { + fn array_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr<'ast>> { enter_parse!(self); let elements = self.parse_list(TokenType::BracketC, Self::expression)?; @@ -570,15 +602,15 @@ impl<'code> Parser<'code> { &mut self, close: TokenType<'code>, mut parser: F, - ) -> ParseResult<'code, Vec> + ) -> ParseResult<'code, Vec<'ast, T>> where F: FnMut(&mut Self) -> ParseResult<'code, T>, { enter_parse!(self); - let mut elements = Vec::new(); + let mut elements = Vec::new_in(self.bump); - if self.peek_kind() == Some(&close) { + if self.peek_kind()? == Some(&close) { return Ok(elements); } @@ -586,14 +618,14 @@ impl<'code> Parser<'code> { elements.push(expr); while self - .peek_kind() + .peek_kind()? .ok_or_else(|| ParseErr::EofExpecting(close.clone()))? != &close { self.expect(TokenType::Comma)?; // trailing comma support - if self.peek_kind() == Some(&close) { + if self.peek_kind()? == Some(&close) { break; } @@ -607,23 +639,28 @@ impl<'code> Parser<'code> { // token helpers - #[must_use] - fn next(&mut self) -> Option> { - self.tokens.next() + fn next(&mut self) -> ParseResult<'code, Option>> { + match self.tokens.next() { + Some(Ok(t)) => Ok(Some(t)), + Some(Err(t)) => Err(t.into()), + None => Ok(None), + } } - #[must_use] - fn peek(&mut self) -> Option<&Token<'code>> { - self.tokens.peek() + fn peek(&mut self) -> ParseResult<'code, Option<&Token<'code>>> { + match self.tokens.peek() { + Some(Ok(t)) => Ok(Some(t)), + Some(Err(t)) => Err(t.clone().into()), + None => Ok(None), + } } - #[must_use] - fn peek_kind(&mut self) -> Option<&TokenType<'code>> { - self.peek().map(|token| &token.kind) + fn peek_kind(&mut self) -> ParseResult<'code, Option<&TokenType<'code>>> { + self.peek().map(|option| option.map(|token| &token.kind)) } fn expect(&mut self, kind: TokenType<'code>) -> ParseResult<'code, Token> { - if let Some(token) = self.next() { + if let Some(token) = self.next()? { if token.kind == kind { Ok(token) } else { @@ -650,6 +687,14 @@ pub enum ParseErr<'code> { InvalidTokenPrimary(Token<'code>), EofExpecting(TokenType<'code>), Eof(&'static str), + LexError(LexError), +} + +// todo: unify error handling +impl From for ParseErr<'_> { + fn from(err: LexError) -> Self { + Self::LexError(err) + } } impl CompilerError for ParseErr<'_> { @@ -665,6 +710,7 @@ impl CompilerError for ParseErr<'_> { ParseErr::BreakOutsideLoop(span) => *span, ParseErr::ReturnOutsideFunction(span) => *span, ParseErr::MaxDepth(span) => *span, + ParseErr::LexError(err) => err.span, } } @@ -685,10 +731,14 @@ impl CompilerError for ParseErr<'_> { ParseErr::BreakOutsideLoop(_) => "break used outside of loop".to_string(), ParseErr::ReturnOutsideFunction(_) => "return used outside of function".to_string(), ParseErr::MaxDepth(_) => "reached maximal nesting depth".to_string(), + ParseErr::LexError(err) => err.message(), } } fn note(&self) -> Option { - None + match self { + ParseErr::LexError(err) => err.note(), + _ => None, + } } }