use error tokens in parser

This commit is contained in:
nora 2021-12-30 16:46:41 +01:00
parent 11b735d728
commit 62e4ffac6c
4 changed files with 243 additions and 283 deletions

View file

@ -12,24 +12,24 @@ use std::str::CharIndices;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Token<'code> { pub struct Token<'code> {
pub span: Span, pub span: Span,
pub kind: TokenType<'code>, pub kind: TokenKind<'code>,
} }
impl<'code> Token<'code> { impl<'code> Token<'code> {
fn single_span(start: usize, kind: TokenType<'code>) -> Token<'code> { fn single_span(start: usize, kind: TokenKind<'code>) -> Token<'code> {
Self { Self {
span: Span::single(start), span: Span::single(start),
kind, kind,
} }
} }
fn new(span: Span, kind: TokenType<'code>) -> Token<'code> { fn new(span: Span, kind: TokenKind<'code>) -> Token<'code> {
Self { span, kind } Self { span, kind }
} }
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum TokenType<'code> { pub enum TokenKind<'code> {
// keywords // keywords
Let, Let,
Print, Print,
@ -124,8 +124,8 @@ impl<'code> Lexer<'code> {
fn maybe_next_char<'a>( fn maybe_next_char<'a>(
&mut self, &mut self,
expect_char: char, expect_char: char,
true_type: TokenType<'a>, true_type: TokenKind<'a>,
false_type: TokenType<'a>, false_type: TokenKind<'a>,
start: usize, start: usize,
) -> Token<'a> { ) -> Token<'a> {
if self.expect(expect_char) { if self.expect(expect_char) {
@ -170,36 +170,36 @@ impl<'code> Iterator for Lexer<'code> {
} }
} }
} }
';' => break Token::single_span(start, TokenType::Semi), ';' => break Token::single_span(start, TokenKind::Semi),
'+' => break Token::single_span(start, TokenType::Plus), '+' => break Token::single_span(start, TokenKind::Plus),
'-' => break Token::single_span(start, TokenType::Minus), '-' => break Token::single_span(start, TokenKind::Minus),
'*' => break Token::single_span(start, TokenType::Asterisk), '*' => break Token::single_span(start, TokenKind::Asterisk),
'/' => break Token::single_span(start, TokenType::Slash), '/' => break Token::single_span(start, TokenKind::Slash),
'%' => break Token::single_span(start, TokenType::Percent), '%' => break Token::single_span(start, TokenKind::Percent),
'{' => break Token::single_span(start, TokenType::BraceO), '{' => break Token::single_span(start, TokenKind::BraceO),
'}' => break Token::single_span(start, TokenType::BraceC), '}' => break Token::single_span(start, TokenKind::BraceC),
'[' => break Token::single_span(start, TokenType::BracketO), '[' => break Token::single_span(start, TokenKind::BracketO),
']' => break Token::single_span(start, TokenType::BracketC), ']' => break Token::single_span(start, TokenKind::BracketC),
'(' => break Token::single_span(start, TokenType::ParenO), '(' => break Token::single_span(start, TokenKind::ParenO),
')' => break Token::single_span(start, TokenType::ParenC), ')' => break Token::single_span(start, TokenKind::ParenC),
'.' => break Token::single_span(start, TokenType::Dot), '.' => break Token::single_span(start, TokenKind::Dot),
',' => break Token::single_span(start, TokenType::Comma), ',' => break Token::single_span(start, TokenKind::Comma),
'=' => { '=' => {
break self.maybe_next_char( break self.maybe_next_char(
'=', '=',
TokenType::EqualEqual, TokenKind::EqualEqual,
TokenType::Equal, TokenKind::Equal,
start, start,
); );
} }
'!' => { '!' => {
break if self.expect('=') { break if self.expect('=') {
let _ = self.code.next(); // consume =; let _ = self.code.next(); // consume =;
Token::new(Span::start_len(start, start + 2), TokenType::BangEqual) Token::new(Span::start_len(start, start + 2), TokenKind::BangEqual)
} else { } else {
Token::new( Token::new(
Span::single(start), Span::single(start),
TokenType::Error(CompilerError::with_note( TokenKind::Error(CompilerError::with_note(
Span::single(start), Span::single(start),
"Expected '=' after '!'".to_string(), "Expected '=' after '!'".to_string(),
"If you meant to use it for negation, use `not`".to_string(), "If you meant to use it for negation, use `not`".to_string(),
@ -210,13 +210,13 @@ impl<'code> Iterator for Lexer<'code> {
'>' => { '>' => {
break self.maybe_next_char( break self.maybe_next_char(
'=', '=',
TokenType::GreaterEqual, TokenKind::GreaterEqual,
TokenType::Greater, TokenKind::Greater,
start, start,
); );
} }
'<' => { '<' => {
break self.maybe_next_char('=', TokenType::LessEqual, TokenType::Less, start); break self.maybe_next_char('=', TokenKind::LessEqual, TokenKind::Less, start);
} }
'"' => { '"' => {
let mut buffer = String::new(); let mut buffer = String::new();
@ -232,7 +232,7 @@ impl<'code> Iterator for Lexer<'code> {
None => { None => {
return Some(Token::new( return Some(Token::new(
Span::single(start), Span::single(start),
TokenType::Error(CompilerError::with_note( TokenKind::Error(CompilerError::with_note(
Span::single(start), // no not show the whole literal, this does not make sense Span::single(start), // no not show the whole literal, this does not make sense
"String literal not closed".to_string(), "String literal not closed".to_string(),
"Close the literal using '\"'".to_string(), "Close the literal using '\"'".to_string(),
@ -241,7 +241,7 @@ impl<'code> Iterator for Lexer<'code> {
} }
} }
}; };
break Token::new(Span::start_end(start, end), TokenType::String(buffer)); break Token::new(Span::start_end(start, end), TokenKind::String(buffer));
} }
char => { char => {
if char.is_ascii_digit() { if char.is_ascii_digit() {
@ -265,15 +265,15 @@ impl<'code> Iterator for Lexer<'code> {
let number = number_str.parse::<f64>(); let number = number_str.parse::<f64>();
break match number { break match number {
Ok(number) if number.is_infinite() => { Ok(number) if number.is_infinite() => {
Token::new(span, TokenType::Error(CompilerError::with_note( Token::new(span, TokenKind::Error(CompilerError::with_note(
span, span,
"Number literal too long".to_string(), "Number literal too long".to_string(),
"A number literal cannot be larger than a 64 bit float can represent" "A number literal cannot be larger than a 64 bit float can represent"
.to_string(), .to_string(),
))) )))
} }
Ok(number) => Token::new(span, TokenType::Number(number)), Ok(number) => Token::new(span, TokenKind::Number(number)),
Err(err) => Token::new(span, TokenType::Error(CompilerError::with_note( Err(err) => Token::new(span, TokenKind::Error(CompilerError::with_note(
span, span,
"Invalid number".to_string(), "Invalid number".to_string(),
err.to_string(), err.to_string(),
@ -297,7 +297,7 @@ impl<'code> Iterator for Lexer<'code> {
} else { } else {
break Token::new( break Token::new(
Span::single(start), Span::single(start),
TokenType::Error(CompilerError::with_note( TokenKind::Error(CompilerError::with_note(
Span::single(start), Span::single(start),
format!("Unexpected character: '{}'", char), format!("Unexpected character: '{}'", char),
"Character is not allowed outside of string literals and comments" "Character is not allowed outside of string literals and comments"
@ -313,25 +313,25 @@ impl<'code> Iterator for Lexer<'code> {
} }
} }
fn keyword_or_ident(name: &str) -> TokenType { fn keyword_or_ident(name: &str) -> TokenKind {
match name { match name {
"loop" => TokenType::Loop, "loop" => TokenKind::Loop,
"let" => TokenType::Let, "let" => TokenKind::Let,
"fn" => TokenType::Fn, "fn" => TokenKind::Fn,
"for" => TokenType::For, "for" => TokenKind::For,
"false" => TokenType::False, "false" => TokenKind::False,
"if" => TokenType::If, "if" => TokenKind::If,
"else" => TokenType::Else, "else" => TokenKind::Else,
"while" => TokenType::While, "while" => TokenKind::While,
"break" => TokenType::Break, "break" => TokenKind::Break,
"return" => TokenType::Return, "return" => TokenKind::Return,
"true" => TokenType::True, "true" => TokenKind::True,
"null" => TokenType::Null, "null" => TokenKind::Null,
"not" => TokenType::Not, "not" => TokenKind::Not,
"and" => TokenType::And, "and" => TokenKind::And,
"or" => TokenType::Or, "or" => TokenKind::Or,
"print" => TokenType::Print, "print" => TokenKind::Print,
_ => TokenType::Ident(name), _ => TokenKind::Ident(name),
} }
} }
@ -346,16 +346,16 @@ fn is_valid_ident_start(char: char) -> bool {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::lex::Lexer; use crate::lex::Lexer;
use crate::lex::TokenType::{self, *}; use crate::lex::TokenKind::{self, *};
type StdString = std::string::String; type StdString = std::string::String;
fn lex_types(str: &str) -> Vec<TokenType> { fn lex_types(str: &str) -> Vec<TokenKind> {
let lexer = Lexer::new(str); let lexer = Lexer::new(str);
lexer.map(|token| token.kind).collect::<Vec<_>>() lexer.map(|token| token.kind).collect::<Vec<_>>()
} }
fn lex_test(code: &str, expected: Vec<TokenType>) { fn lex_test(code: &str, expected: Vec<TokenKind>) {
assert_eq!(lex_types(code), expected) assert_eq!(lex_types(code), expected)
} }
@ -559,7 +559,7 @@ mod test {
.iter() .iter()
.map(|word| format!("{} ", word)) .map(|word| format!("{} ", word))
.collect::<StdString>(); .collect::<StdString>();
let expected = words.map(TokenType::Ident).to_vec(); let expected = words.map(TokenKind::Ident).to_vec();
lex_test(&sentences, expected) lex_test(&sentences, expected)
} }

View file

@ -18,13 +18,7 @@ pub fn run_program(program: &str) {
let ast_alloc = Bump::new(); let ast_alloc = Bump::new();
let lexer = lex::Lexer::new(program); let lexer = lex::Lexer::new(program);
let ast = parse::parse( let ast = parse::parse(lexer, &ast_alloc);
lexer.map(|token| match &token.kind {
TokenType::Error(err) => Err(err.clone()),
_ => Ok(token),
}),
&ast_alloc,
);
match ast { match ast {
Ok(ast) => process_ast(program, ast), Ok(ast) => process_ast(program, ast),

View file

@ -3,7 +3,7 @@ mod test;
use crate::ast::*; use crate::ast::*;
use crate::errors::{CompilerError, Span}; use crate::errors::{CompilerError, Span};
use crate::lex::{Token, TokenType}; use crate::lex::{Token, TokenKind};
use bumpalo::boxed::Box; use bumpalo::boxed::Box;
use bumpalo::collections::Vec; use bumpalo::collections::Vec;
use bumpalo::Bump; use bumpalo::Bump;
@ -12,7 +12,7 @@ use std::iter::Peekable;
#[derive(Debug)] #[derive(Debug)]
struct Parser<'code, 'ast, I> struct Parser<'code, 'ast, I>
where where
I: Iterator<Item = Result<Token<'code>, CompilerError>>, I: Iterator<Item = Token<'code>>,
I: 'code, I: 'code,
{ {
tokens: Peekable<I>, tokens: Peekable<I>,
@ -23,7 +23,7 @@ where
} }
pub fn parse<'ast, 'code>( pub fn parse<'ast, 'code>(
tokens: impl Iterator<Item = Result<Token<'code>, CompilerError>> + 'code, tokens: impl Iterator<Item = Token<'code>> + 'code,
ast_bump: &'ast Bump, ast_bump: &'ast Bump,
) -> Result<Program<'ast>, CompilerError> { ) -> Result<Program<'ast>, CompilerError> {
let mut parser = Parser { let mut parser = Parser {
@ -37,7 +37,7 @@ pub fn parse<'ast, 'code>(
Ok(program) Ok(program)
} }
type ParseResult<'code, T> = Result<T, ParseErr<'code>>; type ParseResult<T> = Result<T, CompilerError>;
macro_rules! parse_bin_op { macro_rules! parse_bin_op {
($self: ident, $lhs: ident, $kind: expr, $function: ident) => {{ ($self: ident, $lhs: ident, $kind: expr, $function: ident) => {{
@ -73,28 +73,33 @@ macro_rules! enter_parse {
impl<'code, 'ast, I> Parser<'code, 'ast, I> impl<'code, 'ast, I> Parser<'code, 'ast, I>
where where
I: Iterator<Item = Result<Token<'code>, CompilerError>>, I: Iterator<Item = Token<'code>>,
I: 'code, I: 'code,
{ {
const MAX_DEPTH: usize = 100; const MAX_DEPTH: usize = 100;
fn program(&mut self) -> ParseResult<'code, Program<'ast>> { fn program(&mut self) -> ParseResult<Program<'ast>> {
Ok(Program(self.statement_list()?)) Ok(Program(self.statement_list()?))
} }
fn too_nested_error(&mut self) -> ParseResult<'code, ()> { fn too_nested_error(&mut self) -> ParseResult<()> {
let next_token = self.next()?; let next_token = self.next();
match next_token { match next_token {
Some(token) => Err(ParseErr::MaxDepth(token.span)), Some(token) => Err(CompilerError::new(
None => Err(ParseErr::Eof("reached EOF while being nested to deeply")), token.span,
"reached maximal nesting depth".to_string(),
)),
None => Err(CompilerError::eof(
"reached EOF while being nested to deeply",
)),
} }
} }
fn statement_list(&mut self) -> ParseResult<'code, Vec<'ast, Stmt<'ast>>> { fn statement_list(&mut self) -> ParseResult<Vec<'ast, Stmt<'ast>>> {
enter_parse!(self); enter_parse!(self);
let mut stmts = Vec::new_in(self.bump); let mut stmts = Vec::new_in(self.bump);
let return_stmts = loop { let return_stmts = loop {
if let Some(TokenType::BraceC) | None = self.peek_kind()? { if let Some(TokenKind::BraceC) | None = self.peek_kind() {
break Ok(stmts); break Ok(stmts);
} }
let stmt = self.statement()?; let stmt = self.statement()?;
@ -104,12 +109,12 @@ where
return_stmts return_stmts
} }
fn block(&mut self) -> ParseResult<'code, Block<'ast>> { fn block(&mut self) -> ParseResult<Block<'ast>> {
enter_parse!(self); enter_parse!(self);
let start_span = self.expect(TokenType::BraceO)?.span; let start_span = self.expect(TokenKind::BraceO)?.span;
let stmts = self.statement_list()?; let stmts = self.statement_list()?;
let end_span = self.expect(TokenType::BraceC)?.span; let end_span = self.expect(TokenKind::BraceC)?.span;
exit_parse!(self); exit_parse!(self);
@ -119,19 +124,22 @@ where
}) })
} }
fn statement(&mut self) -> ParseResult<'code, Stmt<'ast>> { fn statement(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let stmt = match *self.peek_kind()?.ok_or(ParseErr::Eof("statement"))? { let stmt = match *self
TokenType::Let => self.declaration(), .peek_kind()
TokenType::Fn => self.fn_decl(), .ok_or_else(|| CompilerError::eof("statement"))?
TokenType::If => Ok(Stmt::If(self.if_stmt()?)), {
TokenType::Loop => self.loop_stmt(), TokenKind::Let => self.declaration(),
TokenType::While => self.while_stmt(), TokenKind::Fn => self.fn_decl(),
TokenType::Break => self.break_stmt(), TokenKind::If => Ok(Stmt::If(self.if_stmt()?)),
TokenType::Return => self.return_stmt(), TokenKind::Loop => self.loop_stmt(),
TokenType::Print => self.print_stmt(), TokenKind::While => self.while_stmt(),
TokenType::BraceO => Ok(Stmt::Block(self.block()?)), TokenKind::Break => self.break_stmt(),
TokenKind::Return => self.return_stmt(),
TokenKind::Print => self.print_stmt(),
TokenKind::BraceO => Ok(Stmt::Block(self.block()?)),
_ => { _ => {
let stmt = self.assignment()?; let stmt = self.assignment()?;
Ok(stmt) Ok(stmt)
@ -141,14 +149,14 @@ where
stmt stmt
} }
fn declaration(&mut self) -> ParseResult<'code, Stmt<'ast>> { fn declaration(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let keyword_span = self.expect(TokenType::Let)?.span; let keyword_span = self.expect(TokenKind::Let)?.span;
let name = self.ident()?; let name = self.ident()?;
self.expect(TokenType::Equal)?; self.expect(TokenKind::Equal)?;
let init = self.expression()?; let init = self.expression()?;
self.expect(TokenType::Semi)?; self.expect(TokenKind::Semi)?;
exit_parse!(self); exit_parse!(self);
@ -159,10 +167,10 @@ where
})) }))
} }
fn fn_decl(&mut self) -> ParseResult<'code, Stmt<'ast>> { fn fn_decl(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let keyword_span = self.expect(TokenType::Fn)?.span; let keyword_span = self.expect(TokenKind::Fn)?.span;
let name = self.ident()?; let name = self.ident()?;
let args = self.fn_args()?; let args = self.fn_args()?;
@ -180,26 +188,26 @@ where
})) }))
} }
fn fn_args(&mut self) -> ParseResult<'code, Vec<'ast, Ident>> { fn fn_args(&mut self) -> ParseResult<Vec<'ast, Ident>> {
enter_parse!(self); enter_parse!(self);
self.expect(TokenType::ParenO)?; self.expect(TokenKind::ParenO)?;
let params = self.parse_list(TokenType::ParenC, Self::ident)?; let params = self.parse_list(TokenKind::ParenC, Self::ident)?;
self.expect(TokenType::ParenC)?; self.expect(TokenKind::ParenC)?;
exit_parse!(self); exit_parse!(self);
Ok(params) Ok(params)
} }
fn if_stmt(&mut self) -> ParseResult<'code, IfStmt<'ast>> { fn if_stmt(&mut self) -> ParseResult<IfStmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let keyword_span = self.expect(TokenType::If)?.span; let keyword_span = self.expect(TokenKind::If)?.span;
let cond = self.expression()?; let cond = self.expression()?;
let body = self.block()?; let body = self.block()?;
let else_part = if let Some(TokenType::Else) = self.peek_kind()? { let else_part = if let Some(TokenKind::Else) = self.peek_kind() {
Some(self.else_part()?) Some(self.else_part()?)
} else { } else {
None None
@ -217,12 +225,12 @@ where
}) })
} }
fn else_part(&mut self) -> ParseResult<'code, ElsePart<'ast>> { fn else_part(&mut self) -> ParseResult<ElsePart<'ast>> {
enter_parse!(self); enter_parse!(self);
let keyword_span = self.expect(TokenType::Else)?.span; let keyword_span = self.expect(TokenKind::Else)?.span;
let else_part = if let Some(TokenType::If) = self.peek_kind()? { let else_part = if let Some(TokenKind::If) = self.peek_kind() {
let else_if_stmt = self.if_stmt()?; let else_if_stmt = self.if_stmt()?;
let else_span = keyword_span.extend(else_if_stmt.span); let else_span = keyword_span.extend(else_if_stmt.span);
Ok(ElsePart::ElseIf(else_if_stmt, else_span)) Ok(ElsePart::ElseIf(else_if_stmt, else_span))
@ -237,10 +245,10 @@ where
else_part else_part
} }
fn loop_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { fn loop_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let keyword_span = self.expect(TokenType::Loop)?.span; let keyword_span = self.expect(TokenKind::Loop)?.span;
self.inside_loop_depth += 1; self.inside_loop_depth += 1;
let block = self.block()?; let block = self.block()?;
@ -253,10 +261,10 @@ where
Ok(Stmt::Loop(block, keyword_span.extend(loop_span))) Ok(Stmt::Loop(block, keyword_span.extend(loop_span)))
} }
fn while_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { fn while_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let keyword_span = self.expect(TokenType::While)?.span; let keyword_span = self.expect(TokenKind::While)?.span;
let cond = self.expression()?; let cond = self.expression()?;
self.inside_loop_depth += 1; self.inside_loop_depth += 1;
@ -272,75 +280,79 @@ where
})) }))
} }
fn break_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { fn break_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let keyword_span = self.expect(TokenType::Break)?.span; let keyword_span = self.expect(TokenKind::Break)?.span;
let semi_span = self.expect(TokenType::Semi)?.span; let semi_span = self.expect(TokenKind::Semi)?.span;
exit_parse!(self); exit_parse!(self);
if self.inside_loop_depth == 0 { if self.inside_loop_depth == 0 {
Err(ParseErr::BreakOutsideLoop(keyword_span.extend(semi_span))) Err(CompilerError::new(
keyword_span.extend(semi_span),
"break used outside of loop".to_string(),
))
} else { } else {
Ok(Stmt::Break(keyword_span.extend(semi_span))) Ok(Stmt::Break(keyword_span.extend(semi_span)))
} }
} }
fn return_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { fn return_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let keyword_span = self.expect(TokenType::Return)?.span; let keyword_span = self.expect(TokenKind::Return)?.span;
let expr = if let Some(TokenType::Semi) = self.peek_kind()? { let expr = if let Some(TokenKind::Semi) = self.peek_kind() {
None None
} else { } else {
Some(self.expression()?) Some(self.expression()?)
}; };
let semi_span = self.expect(TokenType::Semi)?.span; let semi_span = self.expect(TokenKind::Semi)?.span;
exit_parse!(self); exit_parse!(self);
if self.inside_fn_depth == 0 { if self.inside_fn_depth == 0 {
Err(ParseErr::ReturnOutsideFunction( Err(CompilerError::new(
keyword_span.extend(semi_span), keyword_span.extend(semi_span),
"return used outside of function".to_string(),
)) ))
} else { } else {
Ok(Stmt::Return(expr, keyword_span.extend(semi_span))) Ok(Stmt::Return(expr, keyword_span.extend(semi_span)))
} }
} }
fn print_stmt(&mut self) -> ParseResult<'code, Stmt<'ast>> { fn print_stmt(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let print_span = self.expect(TokenType::Print)?.span; let print_span = self.expect(TokenKind::Print)?.span;
let expr = self.expression()?; let expr = self.expression()?;
let semi_span = self.expect(TokenType::Semi)?.span; let semi_span = self.expect(TokenKind::Semi)?.span;
exit_parse!(self); exit_parse!(self);
Ok(Stmt::Print(expr, print_span.extend(semi_span))) Ok(Stmt::Print(expr, print_span.extend(semi_span)))
} }
fn assignment(&mut self) -> ParseResult<'code, Stmt<'ast>> { fn assignment(&mut self) -> ParseResult<Stmt<'ast>> {
enter_parse!(self); enter_parse!(self);
let expr = self.expression()?; let expr = self.expression()?;
let stmt = if let Some(TokenType::Equal) = self.peek_kind()? { let stmt = if let Some(TokenKind::Equal) = self.peek_kind() {
let _ = self.expect(TokenType::Equal)?; let _ = self.expect(TokenKind::Equal)?;
let init = self.expression()?; let init = self.expression()?;
let semi_span = self.expect(TokenType::Semi)?.span; let semi_span = self.expect(TokenKind::Semi)?.span;
Ok(Stmt::Assignment(Assignment { Ok(Stmt::Assignment(Assignment {
span: expr.span().extend(semi_span), span: expr.span().extend(semi_span),
lhs: expr, lhs: expr,
rhs: init, rhs: init,
})) }))
} else { } else {
let _ = self.expect(TokenType::Semi)?; let _ = self.expect(TokenKind::Semi)?;
Ok(Stmt::Expr(expr)) Ok(Stmt::Expr(expr))
}; };
@ -348,19 +360,19 @@ where
stmt stmt
} }
fn expression(&mut self) -> ParseResult<'code, Expr<'ast>> { fn expression(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let return_expr = self.logical_or(); let return_expr = self.logical_or();
exit_parse!(self); exit_parse!(self);
return_expr return_expr
} }
fn logical_or(&mut self) -> ParseResult<'code, Expr<'ast>> { fn logical_or(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let lhs = self.logical_and()?; let lhs = self.logical_and()?;
let return_expr = match self.peek_kind()? { let return_expr = match self.peek_kind() {
Some(TokenType::Or) => parse_bin_op!(self, lhs, BinaryOpKind::Or, logical_or), Some(TokenKind::Or) => parse_bin_op!(self, lhs, BinaryOpKind::Or, logical_or),
_ => Ok(lhs), _ => Ok(lhs),
}; };
@ -368,12 +380,12 @@ where
return_expr return_expr
} }
fn logical_and(&mut self) -> ParseResult<'code, Expr<'ast>> { fn logical_and(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let lhs = self.equality()?; let lhs = self.equality()?;
let return_expr = match self.peek_kind()? { let return_expr = match self.peek_kind() {
Some(TokenType::And) => parse_bin_op!(self, lhs, BinaryOpKind::And, logical_and), Some(TokenKind::And) => parse_bin_op!(self, lhs, BinaryOpKind::And, logical_and),
_ => Ok(lhs), _ => Ok(lhs),
}; };
@ -381,15 +393,15 @@ where
return_expr return_expr
} }
fn equality(&mut self) -> ParseResult<'code, Expr<'ast>> { fn equality(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let lhs = self.comparison()?; let lhs = self.comparison()?;
let return_expr = match self.peek_kind()? { let return_expr = match self.peek_kind() {
Some(TokenType::BangEqual) => { Some(TokenKind::BangEqual) => {
parse_bin_op!(self, lhs, BinaryOpKind::NotEqual, comparison) parse_bin_op!(self, lhs, BinaryOpKind::NotEqual, comparison)
} }
Some(TokenType::EqualEqual) => { Some(TokenKind::EqualEqual) => {
parse_bin_op!(self, lhs, BinaryOpKind::Equal, comparison) parse_bin_op!(self, lhs, BinaryOpKind::Equal, comparison)
} }
_ => Ok(lhs), _ => Ok(lhs),
@ -398,17 +410,17 @@ where
return_expr return_expr
} }
fn comparison(&mut self) -> ParseResult<'code, Expr<'ast>> { fn comparison(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let lhs = self.term()?; let lhs = self.term()?;
let return_expr = match self.peek_kind()? { let return_expr = match self.peek_kind() {
Some(TokenType::Greater) => parse_bin_op!(self, lhs, BinaryOpKind::Greater, term), Some(TokenKind::Greater) => parse_bin_op!(self, lhs, BinaryOpKind::Greater, term),
Some(TokenType::GreaterEqual) => { Some(TokenKind::GreaterEqual) => {
parse_bin_op!(self, lhs, BinaryOpKind::GreaterEqual, term) parse_bin_op!(self, lhs, BinaryOpKind::GreaterEqual, term)
} }
Some(TokenType::Less) => parse_bin_op!(self, lhs, BinaryOpKind::Less, term), Some(TokenKind::Less) => parse_bin_op!(self, lhs, BinaryOpKind::Less, term),
Some(TokenType::LessEqual) => { Some(TokenKind::LessEqual) => {
parse_bin_op!(self, lhs, BinaryOpKind::LessEqual, term) parse_bin_op!(self, lhs, BinaryOpKind::LessEqual, term)
} }
_ => Ok(lhs), _ => Ok(lhs),
@ -417,39 +429,39 @@ where
return_expr return_expr
} }
fn term(&mut self) -> ParseResult<'code, Expr<'ast>> { fn term(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let lhs = self.factor()?; let lhs = self.factor()?;
let return_expr = match self.peek_kind()? { let return_expr = match self.peek_kind() {
Some(TokenType::Plus) => parse_bin_op!(self, lhs, BinaryOpKind::Add, term), Some(TokenKind::Plus) => parse_bin_op!(self, lhs, BinaryOpKind::Add, term),
Some(TokenType::Minus) => parse_bin_op!(self, lhs, BinaryOpKind::Sub, term), Some(TokenKind::Minus) => parse_bin_op!(self, lhs, BinaryOpKind::Sub, term),
_ => Ok(lhs), _ => Ok(lhs),
}; };
exit_parse!(self); exit_parse!(self);
return_expr return_expr
} }
fn factor(&mut self) -> ParseResult<'code, Expr<'ast>> { fn factor(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let lhs = self.unary()?; let lhs = self.unary()?;
let return_expr = match self.peek_kind()? { let return_expr = match self.peek_kind() {
Some(TokenType::Asterisk) => parse_bin_op!(self, lhs, BinaryOpKind::Mul, factor), Some(TokenKind::Asterisk) => parse_bin_op!(self, lhs, BinaryOpKind::Mul, factor),
Some(TokenType::Slash) => parse_bin_op!(self, lhs, BinaryOpKind::Div, factor), Some(TokenKind::Slash) => parse_bin_op!(self, lhs, BinaryOpKind::Div, factor),
Some(TokenType::Percent) => parse_bin_op!(self, lhs, BinaryOpKind::Mod, factor), Some(TokenKind::Percent) => parse_bin_op!(self, lhs, BinaryOpKind::Mod, factor),
_ => Ok(lhs), _ => Ok(lhs),
}; };
exit_parse!(self); exit_parse!(self);
return_expr return_expr
} }
fn unary(&mut self) -> ParseResult<'code, Expr<'ast>> { fn unary(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let return_expr = match self.peek_kind()? { let return_expr = match self.peek_kind() {
Some(TokenType::Not) => { Some(TokenKind::Not) => {
let unary_op_span = self.next()?.unwrap().span; let unary_op_span = self.next().unwrap().span;
let expr = self.call()?; let expr = self.call()?;
Ok(Expr::UnaryOp(Box::new_in( Ok(Expr::UnaryOp(Box::new_in(
UnaryOp { UnaryOp {
@ -460,8 +472,8 @@ where
self.bump, self.bump,
))) )))
} }
Some(TokenType::Minus) => { Some(TokenKind::Minus) => {
let unary_op_span = self.next()?.unwrap().span; let unary_op_span = self.next().unwrap().span;
let expr = self.call()?; let expr = self.call()?;
Ok(Expr::UnaryOp(Box::new_in( Ok(Expr::UnaryOp(Box::new_in(
UnaryOp { UnaryOp {
@ -478,17 +490,17 @@ where
return_expr return_expr
} }
fn call(&mut self) -> ParseResult<'code, Expr<'ast>> { fn call(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let mut expr = self.primary()?; let mut expr = self.primary()?;
loop { loop {
expr = match self.peek_kind()? { expr = match self.peek_kind() {
Some(TokenType::ParenO) => { Some(TokenKind::ParenO) => {
let open_span = self.expect(TokenType::ParenO)?.span; let open_span = self.expect(TokenKind::ParenO)?.span;
let args = self.parse_list(TokenType::ParenC, Self::expression)?; let args = self.parse_list(TokenKind::ParenC, Self::expression)?;
let close_span = self.expect(TokenType::ParenC)?.span; let close_span = self.expect(TokenKind::ParenC)?.span;
Expr::Call(Box::new_in( Expr::Call(Box::new_in(
Call { Call {
@ -499,8 +511,8 @@ where
self.bump, self.bump,
)) ))
} }
Some(TokenType::Dot) => { Some(TokenKind::Dot) => {
let dot_span = self.expect(TokenType::Dot)?.span; let dot_span = self.expect(TokenKind::Dot)?.span;
let field = self.ident()?; let field = self.ident()?;
Expr::Call(Box::new_in( Expr::Call(Box::new_in(
@ -521,73 +533,80 @@ where
Ok(expr) Ok(expr)
} }
fn primary(&mut self) -> ParseResult<'code, Expr<'ast>> { fn primary(&mut self) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let next = self.next()?.ok_or(ParseErr::Eof("primary"))?; let next = self.next().ok_or_else(|| CompilerError::eof("primary"))?;
let return_expr = match next.kind { let return_expr = match next.kind {
TokenType::String(literal) => Ok(Expr::Literal(Literal::String(literal, next.span))), TokenKind::String(literal) => Ok(Expr::Literal(Literal::String(literal, next.span))),
TokenType::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))), TokenKind::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))),
TokenType::False => Ok(Expr::Literal(Literal::Boolean(false, next.span))), TokenKind::False => Ok(Expr::Literal(Literal::Boolean(false, next.span))),
TokenType::True => Ok(Expr::Literal(Literal::Boolean(true, next.span))), TokenKind::True => Ok(Expr::Literal(Literal::Boolean(true, next.span))),
TokenType::Null => Ok(Expr::Literal(Literal::Null(next.span))), TokenKind::Null => Ok(Expr::Literal(Literal::Null(next.span))),
TokenType::BraceO => self.object_literal(next.span), TokenKind::BraceO => self.object_literal(next.span),
TokenType::BracketO => self.array_literal(next.span), TokenKind::BracketO => self.array_literal(next.span),
TokenType::ParenO => { TokenKind::ParenO => {
let expr = self.expression()?; let expr = self.expression()?;
let _ = self.expect(TokenType::ParenC)?; let _ = self.expect(TokenKind::ParenC)?;
Ok(expr) Ok(expr)
} }
TokenType::Ident(name) => { TokenKind::Ident(name) => {
let name_owned = name.to_owned(); let name_owned = name.to_owned();
Ok(Expr::Ident(Ident { Ok(Expr::Ident(Ident {
sym: name_owned, sym: name_owned,
span: next.span, span: next.span,
})) }))
} }
_ => Err(ParseErr::InvalidTokenPrimary(next)), TokenKind::Error(error) => Err(error),
_ => Err(CompilerError::new(
next.span,
format!("invalid token in expression: `{:?}`", next.kind),
)),
}; };
exit_parse!(self); exit_parse!(self);
return_expr return_expr
} }
fn ident(&mut self) -> ParseResult<'code, Ident> { fn ident(&mut self) -> ParseResult<Ident> {
enter_parse!(self); enter_parse!(self);
let Token { kind, span } = self.next()?.ok_or(ParseErr::Eof("identifier"))?; let Token { kind, span } = self
.next()
.ok_or_else(|| CompilerError::eof("identifier"))?;
let return_expr = match kind { let return_expr = match kind {
TokenType::Ident(name) => { TokenKind::Ident(name) => {
let name_owned = name.to_owned(); let name_owned = name.to_owned();
Ok(Ident { Ok(Ident {
sym: name_owned, sym: name_owned,
span, span,
}) })
} }
TokenKind::Error(error) => Err(error),
_ => { _ => {
return Err(ParseErr::MismatchedKind { return Err(CompilerError::new(
expected: TokenType::Ident("<ident>"), span,
actual: Token { span, kind }, format!("expected identifier, received `{:?}`", kind),
}) ))
} }
}; };
exit_parse!(self); exit_parse!(self);
return_expr return_expr
} }
fn object_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr<'ast>> { fn object_literal(&mut self, open_span: Span) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let close_span = self.expect(TokenType::BraceC)?.span; let close_span = self.expect(TokenKind::BraceC)?.span;
exit_parse!(self); exit_parse!(self);
Ok(Expr::Literal(Literal::Object(open_span.extend(close_span)))) Ok(Expr::Literal(Literal::Object(open_span.extend(close_span))))
} }
fn array_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr<'ast>> { fn array_literal(&mut self, open_span: Span) -> ParseResult<Expr<'ast>> {
enter_parse!(self); enter_parse!(self);
let elements = self.parse_list(TokenType::BracketC, Self::expression)?; let elements = self.parse_list(TokenKind::BracketC, Self::expression)?;
let closing_bracket = self.expect(TokenType::BracketC)?; let closing_bracket = self.expect(TokenKind::BracketC)?;
let return_expr = Ok(Expr::Literal(Literal::Array( let return_expr = Ok(Expr::Literal(Literal::Array(
elements, elements,
@ -599,32 +618,34 @@ where
fn parse_list<T, F>( fn parse_list<T, F>(
&mut self, &mut self,
close: TokenType<'code>, close: TokenKind<'code>,
mut parser: F, mut parser: F,
) -> ParseResult<'code, Vec<'ast, T>> ) -> ParseResult<Vec<'ast, T>>
where where
F: FnMut(&mut Self) -> ParseResult<'code, T>, F: FnMut(&mut Self) -> ParseResult<T>,
{ {
enter_parse!(self); enter_parse!(self);
let mut elements = Vec::new_in(self.bump); let mut elements = Vec::new_in(self.bump);
if self.peek_kind()? == Some(&close) { if self.peek_kind() == Some(&close) {
return Ok(elements); return Ok(elements);
} }
let expr = parser(self)?; let expr = parser(self)?;
elements.push(expr); elements.push(expr);
while self while self.peek_kind().ok_or_else(|| {
.peek_kind()? CompilerError::new(
.ok_or_else(|| ParseErr::EofExpecting(close.clone()))? Span::dummy(),
!= &close format!("reached EOF expecting `{:?}`", close.clone()),
)
})? != &close
{ {
self.expect(TokenType::Comma)?; self.expect(TokenKind::Comma)?;
// trailing comma support // trailing comma support
if self.peek_kind()? == Some(&close) { if self.peek_kind() == Some(&close) {
break; break;
} }
@ -638,96 +659,46 @@ where
// token helpers // token helpers
fn next(&mut self) -> ParseResult<'code, Option<Token<'code>>> { fn next(&mut self) -> Option<Token<'code>> {
match self.tokens.next() { self.tokens.next()
Some(Ok(t)) => Ok(Some(t)),
Some(Err(comp_err)) => Err(ParseErr::LexError(comp_err)),
None => Ok(None),
}
} }
fn peek(&mut self) -> ParseResult<'code, Option<&Token<'code>>> { fn peek(&mut self) -> Option<&Token<'code>> {
match self.tokens.peek() { self.tokens.peek()
Some(Ok(t)) => Ok(Some(t)),
Some(Err(comp_err)) => Err(ParseErr::LexError(comp_err.clone())),
None => Ok(None),
}
} }
fn peek_kind(&mut self) -> ParseResult<'code, Option<&TokenType<'code>>> { fn peek_kind(&mut self) -> Option<&TokenKind<'code>> {
self.peek().map(|option| option.map(|token| &token.kind)) self.peek().map(|token| &token.kind)
} }
fn expect(&mut self, kind: TokenType<'code>) -> ParseResult<'code, Token> { fn expect(&mut self, kind: TokenKind<'code>) -> ParseResult<Token> {
if let Some(token) = self.next()? { if let Some(token) = self.next() {
if token.kind == kind { if token.kind == kind {
Ok(token) Ok(token)
} else if let TokenKind::Error(err) = token.kind {
Err(err)
} else { } else {
Err(ParseErr::MismatchedKind { Err(CompilerError::new(
expected: kind, token.span,
actual: token, format!("expected `{:?}`, received `{:?}`", kind, token.kind),
}) ))
} }
} else { } else {
Err(ParseErr::EofExpecting(kind)) Err(CompilerError::new(
Span::dummy(),
format!("reached EOF expecting `{:?}`", kind),
))
} }
} }
} }
#[derive(Debug)] impl CompilerError {
pub enum ParseErr<'code> { fn eof(message: &str) -> Self {
MaxDepth(Span),
BreakOutsideLoop(Span),
ReturnOutsideFunction(Span),
MismatchedKind {
expected: TokenType<'code>,
actual: Token<'code>,
},
InvalidTokenPrimary(Token<'code>),
EofExpecting(TokenType<'code>),
Eof(&'static str),
LexError(CompilerError),
}
// todo: remove this and ParseErr
impl From<ParseErr<'_>> for CompilerError {
fn from(error: ParseErr<'_>) -> Self {
Self { Self {
span: match &error { // todo: don't
ParseErr::MismatchedKind { span: Span::dummy(),
actual: Token { span, .. }, message: format!("reached EOF while parsing `{}`", message),
.. note: None,
} => *span,
ParseErr::InvalidTokenPrimary(Token { span, .. }) => *span,
ParseErr::EofExpecting(_) => Span::dummy(),
ParseErr::Eof(_) => Span::dummy(),
ParseErr::BreakOutsideLoop(span) => *span,
ParseErr::ReturnOutsideFunction(span) => *span,
ParseErr::MaxDepth(span) => *span,
ParseErr::LexError(err) => err.span,
},
message: match &error {
ParseErr::MismatchedKind { expected, actual } => {
format!("expected `{:?}`, received `{:?}`", expected, actual.kind)
}
ParseErr::InvalidTokenPrimary(token) => {
format!("invalid token in expression: `{:?}`", token.kind)
}
ParseErr::EofExpecting(token) => {
format!("reached EOF searching for `{:?}`", token)
}
ParseErr::Eof(message) => {
format!("reached EOF while parsing `{}`", message)
}
ParseErr::BreakOutsideLoop(_) => "break used outside of loop".to_string(),
ParseErr::ReturnOutsideFunction(_) => "return used outside of function".to_string(),
ParseErr::MaxDepth(_) => "reached maximal nesting depth".to_string(),
ParseErr::LexError(err) => err.message.clone(),
},
note: match error {
ParseErr::LexError(err) => err.note.clone(),
_ => None,
},
} }
} }
} }

View file

@ -1,4 +1,4 @@
use crate::errors::{CompilerError, Span}; use crate::errors::Span;
use crate::parse::Parser; use crate::parse::Parser;
use bumpalo::Bump; use bumpalo::Bump;
use prelude::*; use prelude::*;
@ -6,9 +6,9 @@ use prelude::*;
mod prelude { mod prelude {
pub(super) use super::{parser, test_literal_bin_op, test_number_literal, token}; pub(super) use super::{parser, test_literal_bin_op, test_number_literal, token};
pub(super) use crate::ast::{Expr, Stmt}; pub(super) use crate::ast::{Expr, Stmt};
pub(super) use crate::lex::TokenType::*; pub(super) use crate::lex::TokenKind::*;
pub type Token = crate::lex::Token<'static>; pub type Token = crate::lex::Token<'static>;
pub type TokenType = crate::lex::TokenType<'static>; pub type TokenType = crate::lex::TokenKind<'static>;
pub(super) use bumpalo::Bump; pub(super) use bumpalo::Bump;
} }
@ -22,13 +22,8 @@ fn token(kind: TokenType) -> Token {
fn parser<'ast>( fn parser<'ast>(
tokens: std::vec::Vec<Token>, tokens: std::vec::Vec<Token>,
alloc: &'ast Bump, alloc: &'ast Bump,
) -> Parser<'static, 'ast, std::vec::IntoIter<Result<Token, CompilerError>>> ) -> Parser<'static, 'ast, std::vec::IntoIter<Token>>
where { where {
let tokens = tokens
.into_iter()
.map(Ok)
.collect::<Vec<Result<Token, CompilerError>>>();
Parser { Parser {
tokens: tokens.into_iter().peekable(), tokens: tokens.into_iter().peekable(),
depth: 0, depth: 0,