From c5b82c4b18409c66cbc52ad8506fc9eeb23ebff0 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sun, 31 Oct 2021 13:59:47 +0100 Subject: [PATCH] stmt parse --- grammar.txt | 4 +- src/ast.rs | 51 +++- src/errors.rs | 9 + src/lex.rs | 3 + src/parse.rs | 718 ---------------------------------------------- src/parse/mod.rs | 409 ++++++++++++++++++++++++++ src/parse/test.rs | 423 +++++++++++++++++++++++++++ 7 files changed, 884 insertions(+), 733 deletions(-) delete mode 100644 src/parse.rs create mode 100644 src/parse/mod.rs create mode 100644 src/parse/test.rs diff --git a/grammar.txt b/grammar.txt index 4fff7d1..a64a563 100644 --- a/grammar.txt +++ b/grammar.txt @@ -30,9 +30,9 @@ ::= "(" { { "," } } ")" - ::= "if" { } + ::= "if" { } - ::= "else" ( | ) + ::= "else" ( | ) ::= "loop" diff --git a/src/ast.rs b/src/ast.rs index e04ca44..775e956 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -11,7 +11,10 @@ pub type Symbol = String; pub struct Program(pub Vec); #[derive(Debug, Clone, PartialEq)] -pub struct Block(pub Vec); +pub struct Block { + pub stmts: Vec, + pub span: Span, +} #[derive(Debug, Clone, PartialEq)] pub enum Stmt { @@ -19,13 +22,31 @@ pub enum Stmt { Assignment(Assignment), FnDecl(FnDecl), If(IfStmt), - Loop(Block), + Loop(Block, Span), While(WhileStmt), - Break(Break), - Return(Option), + Break(Span), + Return(Option, Span), + Block(Block), Expr(Expr), } +impl Stmt { + pub fn span(&self) -> Span { + match self { + Stmt::Declaration(decl) => decl.span, + Stmt::Assignment(assign) => assign.span, + Stmt::FnDecl(decl) => decl.span, + Stmt::If(if_stmt) => if_stmt.span, + Stmt::Loop(_, span) => *span, + Stmt::While(while_stmt) => while_stmt.span, + Stmt::Break(span) => *span, + Stmt::Return(_, span) => *span, + Stmt::Block(block) => block.span, + Stmt::Expr(expr) => expr.span(), + } + } +} + #[derive(Debug, Clone, PartialEq)] pub struct Declaration { pub span: Span, @@ -51,15 +72,24 @@ pub struct FnDecl { #[derive(Debug, Clone, PartialEq)] pub struct IfStmt { pub span: Span, - pub condition: Expr, + pub cond: Expr, pub body: Block, - pub else_part: Box, + pub else_part: Option>, } #[derive(Debug, Clone, PartialEq)] pub enum ElsePart { - Else(Block), - ElseIf(IfStmt), + Else(Block, Span), + ElseIf(IfStmt, Span), +} + +impl ElsePart { + pub fn span(&self) -> Span { + match self { + ElsePart::Else(_, span) => *span, + ElsePart::ElseIf(_, span) => *span, + } + } } #[derive(Debug, Clone, PartialEq)] @@ -69,11 +99,6 @@ pub struct WhileStmt { pub body: Block, } -#[derive(Debug, Clone, PartialEq)] -pub struct Break { - pub span: Span, -} - #[derive(Debug, Clone, PartialEq)] pub enum Expr { Ident(Symbol, Span), diff --git a/src/errors.rs b/src/errors.rs index 243d79e..a042c38 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -47,6 +47,15 @@ mod span { } } + /// Extends the span by the second one, if it exists + /// The other one has to be after the current one, if it exists + pub fn option_extend(&self, other: Option) -> Span { + match other { + None => *self, + Some(span) => self.extend(span), + } + } + pub fn len(&self) -> usize { self.end - self.start } diff --git a/src/lex.rs b/src/lex.rs index cf48fb1..edafaee 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -39,6 +39,7 @@ pub enum TokenType<'code> { While, For, Break, + Return, True, False, Null, @@ -326,6 +327,8 @@ fn keyword_or_ident(name: &str) -> TokenType { b'w' if len == 5 && bs[1..5] == *b"hile" => TokenType::While, // break b'b' if len == 5 && bs[1..5] == *b"reak" => TokenType::Break, + // return + b'r' if len == 6 && bs[1..6] == *b"eturn" => TokenType::Return, // true b't' if len == 4 && bs[1..4] == *b"rue" => TokenType::True, // null && not diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index d61ed7c..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,718 +0,0 @@ -#![allow(dead_code)] - -use crate::ast::*; -use crate::errors::{CompilerError, Span}; -use crate::lex::{Token, TokenType}; -use std::iter::Peekable; - -pub fn parse(tokens: Vec) -> Result { - let mut parser = Parser { - tokens: tokens.into_iter().peekable(), - }; - let program = parser.program()?; - Ok(program) -} - -#[derive(Debug)] -struct Parser<'code> { - tokens: Peekable>>, -} - -type ParseResult<'code, T> = Result>; - -macro_rules! parse_bin_op { - ($self: ident, $lhs: ident, $kind: expr, $function: ident) => {{ - let _ = $self.next(); - let rhs = $self.$function()?; - Ok(Expr::BinaryOp(Box::new(BinaryOp { - span: $lhs.span().extend(rhs.span()), - lhs: $lhs, - rhs, - kind: $kind, - }))) - }}; -} - -impl<'code> Parser<'code> { - fn program(&mut self) -> ParseResult<'code, Program> { - Ok(Program(self.statement_list()?)) - } - - fn statement_list(&mut self) -> ParseResult<'code, Vec> { - let mut stmts = Vec::new(); - loop { - if let Some(TokenType::BraceC) | None = self.peek().map(|token| &token.kind) { - let _ = self.next(); - return Ok(stmts); - } - let stmt = self.statement()?; - stmts.push(stmt); - } - } - - fn block(&mut self) -> ParseResult<'code, Block> { - Ok(Block(self.statement_list()?)) - } - - fn statement(&mut self) -> ParseResult<'code, Stmt> { - let expr = self.expression()?; - self.expect(TokenType::Semi)?; - Ok(Stmt::Expr(expr)) - } - - fn declaration(&mut self) -> ParseResult<'code, Declaration> { - todo!() - } - - fn assignment(&mut self) -> ParseResult<'code, Assignment> { - todo!() - } - - fn fn_decl(&mut self) -> ParseResult<'code, FnDecl> { - todo!() - } - - fn if_stmt(&mut self) -> ParseResult<'code, IfStmt> { - todo!() - } - - fn loop_stmt(&mut self) -> ParseResult<'code, Block> { - todo!() - } - - fn expression(&mut self) -> ParseResult<'code, Expr> { - self.logical_or() - } - - fn logical_or(&mut self) -> ParseResult<'code, Expr> { - let lhs = self.logical_and()?; - match self.peek().map(|token| &token.kind) { - Some(TokenType::Or) => parse_bin_op!(self, lhs, BinaryOpKind::Or, logical_and), - _ => Ok(lhs), - } - } - - fn logical_and(&mut self) -> ParseResult<'code, Expr> { - let lhs = self.equality()?; - match self.peek().map(|token| &token.kind) { - Some(TokenType::And) => parse_bin_op!(self, lhs, BinaryOpKind::And, equality), - _ => Ok(lhs), - } - } - - fn equality(&mut self) -> ParseResult<'code, Expr> { - let lhs = self.comparison()?; - match self.peek().map(|token| &token.kind) { - Some(TokenType::BangEqual) => { - parse_bin_op!(self, lhs, BinaryOpKind::NotEqual, comparison) - } - Some(TokenType::EqualEqual) => { - parse_bin_op!(self, lhs, BinaryOpKind::Equal, comparison) - } - _ => Ok(lhs), - } - } - - fn comparison(&mut self) -> ParseResult<'code, Expr> { - let lhs = self.term()?; - match self.peek().map(|token| &token.kind) { - Some(TokenType::Greater) => parse_bin_op!(self, lhs, BinaryOpKind::Greater, term), - Some(TokenType::GreaterEqual) => { - parse_bin_op!(self, lhs, BinaryOpKind::GreaterEqual, term) - } - Some(TokenType::Less) => parse_bin_op!(self, lhs, BinaryOpKind::Less, term), - Some(TokenType::LessEqual) => { - parse_bin_op!(self, lhs, BinaryOpKind::LessEqual, term) - } - _ => Ok(lhs), - } - } - - fn term(&mut self) -> ParseResult<'code, Expr> { - let lhs = self.factor()?; - match self.peek().map(|token| &token.kind) { - Some(TokenType::Plus) => parse_bin_op!(self, lhs, BinaryOpKind::Add, factor), - Some(TokenType::Minus) => parse_bin_op!(self, lhs, BinaryOpKind::Sub, factor), - _ => Ok(lhs), - } - } - - fn factor(&mut self) -> ParseResult<'code, Expr> { - let lhs = self.unary()?; - match self.peek().map(|token| &token.kind) { - Some(TokenType::Asterisk) => parse_bin_op!(self, lhs, BinaryOpKind::Mul, unary), - Some(TokenType::Slash) => parse_bin_op!(self, lhs, BinaryOpKind::Div, unary), - Some(TokenType::Percent) => parse_bin_op!(self, lhs, BinaryOpKind::Mod, unary), - _ => Ok(lhs), - } - } - - fn unary(&mut self) -> ParseResult<'code, Expr> { - match self.peek().map(|token| &token.kind) { - Some(TokenType::Not) => { - let unary_op_span = self.next().unwrap().span; - let expr = self.expression()?; - Ok(Expr::UnaryOp(Box::new(UnaryOp { - span: unary_op_span.extend(expr.span()), - expr, - kind: UnaryOpKind::Not, - }))) - } - Some(TokenType::Minus) => { - let unary_op_span = self.next().unwrap().span; - let expr = self.expression()?; - Ok(Expr::UnaryOp(Box::new(UnaryOp { - span: unary_op_span.extend(expr.span()), - expr, - kind: UnaryOpKind::Neg, - }))) - } - _ => self.primary(), - } - } - - fn primary(&mut self) -> ParseResult<'code, Expr> { - let next = self.next().ok_or(ParseErr::EOF("primary"))?; - match next.kind { - TokenType::String(literal) => Ok(Expr::Literal(Literal::String(literal, next.span))), - TokenType::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))), - TokenType::False => Ok(Expr::Literal(Literal::Boolean(false, next.span))), - TokenType::True => Ok(Expr::Literal(Literal::Boolean(true, next.span))), - TokenType::Null => Ok(Expr::Literal(Literal::Null(next.span))), - TokenType::BraceO => self.object_literal(next.span), - TokenType::BracketO => self.array_literal(next.span), - TokenType::ParenO => { - let expr = self.expression()?; - let _ = self.expect(TokenType::ParenC)?; - Ok(expr) - } - TokenType::Ident(name) => { - let name_owned = name.to_owned(); - Ok(Expr::Ident(name_owned, next.span)) - } - _ => Err(ParseErr::InvalidTokenPrimary(next)), - } - } - - fn object_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr> { - let close_span = self.expect(TokenType::BraceC)?.span; - Ok(Expr::Literal(Literal::Object(open_span.extend(close_span)))) - } - - fn array_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr> { - let mut elements = Vec::new(); - while self - .peek() - .ok_or(ParseErr::EOFExpecting(TokenType::BracketC))? - .kind - != TokenType::BracketC - { - let expr = self.expression()?; - elements.push(expr); - self.expect(TokenType::Comma)?; - } - let closing_bracket = self.expect(TokenType::BracketC)?; - Ok(Expr::Literal(Literal::Array( - elements, - open_span.extend(closing_bracket.span), - ))) - } - - // token helpers - - #[must_use] - fn next(&mut self) -> Option> { - self.tokens.next() - } - - #[must_use] - fn peek(&mut self) -> Option<&Token<'code>> { - self.tokens.peek() - } - - fn expect(&mut self, kind: TokenType<'code>) -> ParseResult<'code, Token> { - if let Some(token) = self.next() { - if token.kind == kind { - Ok(token) - } else { - Err(ParseErr::MismatchedKind { - expected: kind, - actual: token, - }) - } - } else { - Err(ParseErr::EOFExpecting(kind)) - } - } -} - -#[derive(Debug)] -pub enum ParseErr<'code> { - MismatchedKind { - expected: TokenType<'code>, - actual: Token<'code>, - }, - InvalidTokenPrimary(Token<'code>), - EOFExpecting(TokenType<'code>), - EOF(&'static str), -} - -impl CompilerError for ParseErr<'_> { - fn span(&self) -> Span { - match self { - ParseErr::MismatchedKind { - actual: Token { span, .. }, - .. - } => *span, - ParseErr::InvalidTokenPrimary(Token { span, .. }) => *span, - ParseErr::EOFExpecting(_) => Span::dummy(), - ParseErr::EOF(_) => Span::dummy(), - } - } - - fn message(&self) -> String { - match self { - ParseErr::MismatchedKind { expected, actual } => { - format!("expected: {:?}, received: {:?}", expected, actual.kind) - } - ParseErr::InvalidTokenPrimary(token) => { - format!("invalid token in expression: {:?}", token.kind) - } - ParseErr::EOFExpecting(token) => { - format!("reached EOF searching for: {:?}", token) - } - ParseErr::EOF(message) => { - format!("reached EOF while parsing: {}", message) - } - } - } - - fn note(&self) -> Option { - None - } -} - -#[cfg(test)] -mod test { - use crate::ast::BinaryOp; - use crate::parse::Parser; - use prelude::*; - - mod prelude { - pub(super) use super::{parser, test_literal_bin_op, test_number_literal, token}; - pub(super) use crate::ast::{BinaryOp, BinaryOpKind, Expr, Literal}; - pub(super) use crate::errors::Span; - pub(super) use crate::lex::{Token, TokenType}; - } - - fn token(kind: TokenType) -> Token { - Token { - span: Span::dummy(), - kind, - } - } - - fn parser(tokens: Vec) -> Parser { - Parser { - tokens: tokens.into_iter().peekable(), - } - } - - fn test_literal_bin_op>) -> Expr>( - token_type: TokenType, - expected_op_kind: BinaryOpKind, - parser: F, - ) { - let tokens = [TokenType::Number(10.0), token_type, TokenType::Number(4.0)] - .map(token) - .into(); - let factor = parser(tokens); - assert_eq!( - Expr::BinaryOp(Box::new(BinaryOp { - span: Span::dummy(), - lhs: Expr::Literal(Literal::Number(10.0, Span::dummy())), - rhs: Expr::Literal(Literal::Number(4.0, Span::dummy())), - kind: expected_op_kind - })), - factor - ); - } - - fn test_number_literal>) -> Expr>(parser: F) { - let tokens = [TokenType::Number(10.0)].map(token).into(); - let unary = parser(tokens); - assert_eq!(Expr::Literal(Literal::Number(10.0, Span::dummy())), unary); - } - - mod expr { - use super::prelude::*; - use crate::ast::{UnaryOp, UnaryOpKind}; - use TokenType::*; - - fn parse_expr(tokens: Vec) -> Expr { - let mut parser = parser(tokens); - parser.expression().unwrap() - } - - #[test] - fn number_literal() { - test_number_literal(parse_expr); - } - - #[test] - fn add_multiply() { - let tokens = [Number(10.0), Plus, Number(20.0), Asterisk, Number(100.0)] - .map(token) - .into(); - let expr = parse_expr(tokens); - assert_eq!( - Expr::BinaryOp(Box::new(BinaryOp { - span: Span::dummy(), - lhs: Expr::Literal(Literal::Number(10.0, Span::dummy())), - rhs: Expr::BinaryOp(Box::new(BinaryOp { - span: Span::dummy(), - lhs: Expr::Literal(Literal::Number(20.0, Span::dummy())), - rhs: Expr::Literal(Literal::Number(100.0, Span::dummy())), - - kind: BinaryOpKind::Mul - })), - kind: BinaryOpKind::Add - })), - expr - ); - } - - #[test] - fn equal_unary() { - let tokens = [Number(10.0), EqualEqual, Minus, Number(10.0)] - .map(token) - .into(); - let expr = parse_expr(tokens); - assert_eq!( - Expr::BinaryOp(Box::new(BinaryOp { - span: Span::dummy(), - lhs: Expr::Literal(Literal::Number(10.0, Span::dummy())), - rhs: Expr::UnaryOp(Box::new(UnaryOp { - span: Span::dummy(), - expr: Expr::Literal(Literal::Number(10.0, Span::dummy())), - kind: UnaryOpKind::Neg - })), - kind: BinaryOpKind::Equal - })), - expr - ); - } - - #[test] - fn parentheses_mul_add() { - let tokens = [ - Number(10.0), - Asterisk, - ParenO, - Number(20.0), - Plus, - Number(30.0), - ParenC, - ] - .map(token) - .into(); - let expr = parse_expr(tokens); - assert_eq!( - Expr::BinaryOp(Box::new(BinaryOp { - span: Span::dummy(), - lhs: Expr::Literal(Literal::Number(10.0, Span::dummy())), - rhs: Expr::BinaryOp(Box::new(BinaryOp { - span: Span::dummy(), - lhs: Expr::Literal(Literal::Number(20.0, Span::dummy())), - rhs: Expr::Literal(Literal::Number(30.0, Span::dummy())), - - kind: BinaryOpKind::Add - })), - kind: BinaryOpKind::Mul - })), - expr - ); - } - } - - mod logical_or { - use super::prelude::*; - - fn parse_logical_or(tokens: Vec) -> Expr { - let mut parser = parser(tokens); - parser.logical_or().unwrap() - } - - #[test] - fn number_literal() { - test_number_literal(parse_logical_or); - } - - #[test] - fn and() { - test_literal_bin_op(TokenType::Or, BinaryOpKind::Or, parse_logical_or); - } - } - - mod logical_and { - use super::prelude::*; - - fn parse_logical_and(tokens: Vec) -> Expr { - let mut parser = parser(tokens); - parser.logical_and().unwrap() - } - - #[test] - fn number_literal() { - test_number_literal(parse_logical_and); - } - - #[test] - fn and() { - test_literal_bin_op(TokenType::And, BinaryOpKind::And, parse_logical_and); - } - } - - mod equality { - use super::prelude::*; - - fn parse_equality(tokens: Vec) -> Expr { - let mut parser = parser(tokens); - parser.equality().unwrap() - } - - #[test] - fn number_literal() { - test_number_literal(parse_equality); - } - - #[test] - fn not_equal() { - test_literal_bin_op(TokenType::BangEqual, BinaryOpKind::NotEqual, parse_equality); - } - - #[test] - fn equal() { - test_literal_bin_op(TokenType::EqualEqual, BinaryOpKind::Equal, parse_equality); - } - } - - mod comparison { - use super::prelude::*; - - fn parse_comparison(tokens: Vec) -> Expr { - let mut parser = parser(tokens); - parser.comparison().unwrap() - } - - #[test] - fn number_literal() { - test_number_literal(parse_comparison); - } - - #[test] - fn greater() { - test_literal_bin_op(TokenType::Greater, BinaryOpKind::Greater, parse_comparison); - } - - #[test] - fn greater_equal() { - test_literal_bin_op( - TokenType::GreaterEqual, - BinaryOpKind::GreaterEqual, - parse_comparison, - ); - } - - #[test] - fn less() { - test_literal_bin_op(TokenType::Less, BinaryOpKind::Less, parse_comparison); - } - - #[test] - fn less_equal() { - test_literal_bin_op( - TokenType::LessEqual, - BinaryOpKind::LessEqual, - parse_comparison, - ); - } - } - - mod term { - use super::prelude::*; - - fn parse_term(tokens: Vec) -> Expr { - let mut parser = parser(tokens); - parser.term().unwrap() - } - - #[test] - fn number_literal() { - test_number_literal(parse_term); - } - - #[test] - fn add() { - test_literal_bin_op(TokenType::Plus, BinaryOpKind::Add, parse_term); - } - - #[test] - fn sub() { - test_literal_bin_op(TokenType::Minus, BinaryOpKind::Sub, parse_term); - } - } - - mod factor { - use super::prelude::*; - - fn parse_factor(tokens: Vec) -> Expr { - let mut parser = parser(tokens); - parser.factor().unwrap() - } - - #[test] - fn number_literal() { - test_number_literal(parse_factor); - } - - #[test] - fn multiply() { - test_literal_bin_op(TokenType::Asterisk, BinaryOpKind::Mul, parse_factor); - } - - #[test] - fn divide() { - test_literal_bin_op(TokenType::Slash, BinaryOpKind::Div, parse_factor); - } - - #[test] - fn modulo() { - test_literal_bin_op(TokenType::Percent, BinaryOpKind::Mod, parse_factor); - } - } - - mod unary { - use super::prelude::*; - use crate::ast::{UnaryOp, UnaryOpKind}; - - fn parse_unary(tokens: Vec) -> Expr { - let mut parser = parser(tokens); - parser.unary().unwrap() - } - - #[test] - fn number_literal() { - test_number_literal(parse_unary); - } - - // needs expr support - - #[test] - fn not() { - let tokens = [TokenType::Not, TokenType::True].map(token).into(); - let unary = parse_unary(tokens); - assert_eq!( - Expr::UnaryOp(Box::new(UnaryOp { - span: Span::dummy(), - expr: Expr::Literal(Literal::Boolean(true, Span::dummy())), - kind: UnaryOpKind::Not - })), - unary - ); - } - - #[test] - fn neg() { - let tokens = [TokenType::Minus, TokenType::Number(10.0)] - .map(token) - .into(); - let unary = parse_unary(tokens); - assert_eq!( - Expr::UnaryOp(Box::new(UnaryOp { - span: Span::dummy(), - expr: Expr::Literal(Literal::Number(10.0, Span::dummy())), - kind: UnaryOpKind::Neg - })), - unary - ); - } - } - - mod primary { - use super::prelude::*; - - fn parse_primary(tokens: Vec) -> Expr { - let mut parser = parser(tokens); - parser.primary().unwrap() - } - - #[test] - fn ident() { - let tokens = [TokenType::Ident("tokens")].map(token).into(); - let literal = parse_primary(tokens); - assert_eq!(Expr::Ident("tokens".to_string(), Span::dummy()), literal); - } - - #[test] - fn string() { - let tokens = [TokenType::Number(10.0)].map(token).into(); - let literal = parse_primary(tokens); - assert_eq!(Expr::Literal(Literal::Number(10.0, Span::dummy())), literal); - } - - #[test] - fn number() { - let tokens = [TokenType::String("uwu".to_string())].map(token).into(); - let literal = parse_primary(tokens); - assert_eq!( - Expr::Literal(Literal::String("uwu".to_string(), Span::dummy())), - literal - ); - } - - #[test] - fn empty_object() { - let tokens = [TokenType::BraceO, TokenType::BraceC].map(token).into(); - let literal = parse_primary(tokens); - assert_eq!(Expr::Literal(Literal::Object(Span::dummy())), literal); - } - - #[test] - fn empty_array() { - let tokens = [TokenType::BracketO, TokenType::BracketC].map(token).into(); - let literal = parse_primary(tokens); - assert_eq!( - Expr::Literal(Literal::Array(Vec::new(), Span::dummy())), - literal - ); - } - - #[test] - fn r#false() { - let tokens = [TokenType::False].map(token).into(); - let literal = parse_primary(tokens); - assert_eq!( - Expr::Literal(Literal::Boolean(false, Span::dummy())), - literal - ); - } - - #[test] - fn r#true() { - let tokens = [TokenType::True].map(token).into(); - let literal = parse_primary(tokens); - assert_eq!( - Expr::Literal(Literal::Boolean(true, Span::dummy())), - literal - ); - } - - #[test] - fn null() { - let tokens = [TokenType::Null].map(token).into(); - let literal = parse_primary(tokens); - assert_eq!(Expr::Literal(Literal::Null(Span::dummy())), literal); - } - } -} diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..02fb139 --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1,409 @@ +#![allow(dead_code)] + +#[cfg(test)] +mod test; + +use crate::ast::*; +use crate::errors::{CompilerError, Span}; +use crate::lex::{Token, TokenType}; +use std::iter::Peekable; + +pub fn parse(tokens: Vec) -> Result { + let mut parser = Parser { + tokens: tokens.into_iter().peekable(), + inside_fn_depth: 0, + inside_loop_depth: 0, + }; + let program = parser.program()?; + Ok(program) +} + +#[derive(Debug)] +struct Parser<'code> { + tokens: Peekable>>, + inside_fn_depth: usize, + inside_loop_depth: usize, +} + +type ParseResult<'code, T> = Result>; + +macro_rules! parse_bin_op { + ($self: ident, $lhs: ident, $kind: expr, $function: ident) => {{ + let _ = $self.next(); + let rhs = $self.$function()?; + Ok(Expr::BinaryOp(Box::new(BinaryOp { + span: $lhs.span().extend(rhs.span()), + lhs: $lhs, + rhs, + kind: $kind, + }))) + }}; +} + +impl<'code> Parser<'code> { + fn program(&mut self) -> ParseResult<'code, Program> { + Ok(Program(self.statement_list()?)) + } + + fn statement_list(&mut self) -> ParseResult<'code, Vec> { + let mut stmts = Vec::new(); + loop { + if let Some(TokenType::BraceC) | None = self.peek_kind() { + let _ = self.next(); + return Ok(stmts); + } + let stmt = self.statement()?; + stmts.push(stmt); + } + } + + fn block(&mut self) -> ParseResult<'code, Block> { + let start_span = self.expect(TokenType::BraceO)?.span; + let stmts = self.statement_list()?; + let end_span = self.expect(TokenType::BraceC)?.span; + Ok(Block { + stmts, + span: start_span.extend(end_span), + }) + } + + fn statement(&mut self) -> ParseResult<'code, Stmt> { + match self.peek_kind().ok_or(ParseErr::EOF("statement"))? { + &TokenType::Let => self.declaration(), + &TokenType::Fn => self.fn_decl(), + &TokenType::If => Ok(Stmt::If(self.if_stmt()?)), + &TokenType::Loop => self.loop_stmt(), + &TokenType::While => self.while_stmt(), + &TokenType::Break => self.break_stmt(), + &TokenType::Return => self.return_stmt(), + &TokenType::BraceO => Ok(Stmt::Block(self.block()?)), + _ => { + let expr = self.expression()?; + self.expect(TokenType::Semi)?; + Ok(Stmt::Expr(expr)) + } + } + } + + fn declaration(&mut self) -> ParseResult<'code, Stmt> { + todo!() + } + + fn assignment(&mut self) -> ParseResult<'code, Stmt> { + todo!() + } + + fn fn_decl(&mut self) -> ParseResult<'code, Stmt> { + todo!() + } + + fn if_stmt(&mut self) -> ParseResult<'code, IfStmt> { + let keyword_span = self.expect(TokenType::If)?.span; + let cond = self.expression()?; + let body = self.block()?; + + let else_part = if let Some(TokenType::Else) = self.peek_kind() { + Some(self.else_part()?) + } else { + None + }; + + Ok(IfStmt { + span: keyword_span + .extend(body.span) + .option_extend(else_part.as_ref().map(|part| part.span())), + cond, + body, + else_part: else_part.map(Box::new), + }) + } + + fn else_part(&mut self) -> ParseResult<'code, ElsePart> { + let keyword_span = self.expect(TokenType::Else)?.span; + + if let Some(TokenType::If) = self.peek_kind() { + let else_if_stmt = self.if_stmt()?; + let else_span = keyword_span.extend(else_if_stmt.span); + Ok(ElsePart::ElseIf(else_if_stmt, else_span)) + } else { + let block = self.block()?; + let else_span = keyword_span.extend(block.span); + Ok(ElsePart::Else(block, else_span)) + } + } + + fn loop_stmt(&mut self) -> ParseResult<'code, Stmt> { + let keyword_span = self.expect(TokenType::Loop)?.span; + + self.inside_loop_depth += 1; + let block = self.block()?; + self.inside_loop_depth -= 1; + + let loop_span = keyword_span.extend(block.span); + Ok(Stmt::Loop(block, keyword_span.extend(loop_span))) + } + + fn while_stmt(&mut self) -> ParseResult<'code, Stmt> { + let keyword_span = self.expect(TokenType::While)?.span; + let cond = self.expression()?; + let body = self.block()?; + Ok(Stmt::While(WhileStmt { + span: keyword_span.extend(body.span), + cond, + body, + })) + } + + fn break_stmt(&mut self) -> ParseResult<'code, Stmt> { + let keyword_span = self.expect(TokenType::Break)?.span; + let semi_span = self.expect(TokenType::Semi)?.span; + + if self.inside_loop_depth == 0 { + Err(ParseErr::BreakOutsideLoop(keyword_span.extend(semi_span))) + } else { + Ok(Stmt::Break(keyword_span.extend(semi_span))) + } + } + + fn return_stmt(&mut self) -> ParseResult<'code, Stmt> { + let keyword_span = self.expect(TokenType::Return)?.span; + + let expr = if let Some(TokenType::Semi) = self.peek_kind() { + None + } else { + Some(self.expression()?) + }; + + let semi_span = self.expect(TokenType::Semi)?.span; + + if self.inside_fn_depth == 0 { + Err(ParseErr::ReturnOutsideFunction( + keyword_span.extend(semi_span), + )) + } else { + Ok(Stmt::Return(expr, keyword_span.extend(semi_span))) + } + } + + fn expression(&mut self) -> ParseResult<'code, Expr> { + self.logical_or() + } + + fn logical_or(&mut self) -> ParseResult<'code, Expr> { + let lhs = self.logical_and()?; + match self.peek_kind() { + Some(TokenType::Or) => parse_bin_op!(self, lhs, BinaryOpKind::Or, logical_and), + _ => Ok(lhs), + } + } + + fn logical_and(&mut self) -> ParseResult<'code, Expr> { + let lhs = self.equality()?; + match self.peek_kind() { + Some(TokenType::And) => parse_bin_op!(self, lhs, BinaryOpKind::And, equality), + _ => Ok(lhs), + } + } + + fn equality(&mut self) -> ParseResult<'code, Expr> { + let lhs = self.comparison()?; + match self.peek_kind() { + Some(TokenType::BangEqual) => { + parse_bin_op!(self, lhs, BinaryOpKind::NotEqual, comparison) + } + Some(TokenType::EqualEqual) => { + parse_bin_op!(self, lhs, BinaryOpKind::Equal, comparison) + } + _ => Ok(lhs), + } + } + + fn comparison(&mut self) -> ParseResult<'code, Expr> { + let lhs = self.term()?; + match self.peek_kind() { + Some(TokenType::Greater) => parse_bin_op!(self, lhs, BinaryOpKind::Greater, term), + Some(TokenType::GreaterEqual) => { + parse_bin_op!(self, lhs, BinaryOpKind::GreaterEqual, term) + } + Some(TokenType::Less) => parse_bin_op!(self, lhs, BinaryOpKind::Less, term), + Some(TokenType::LessEqual) => { + parse_bin_op!(self, lhs, BinaryOpKind::LessEqual, term) + } + _ => Ok(lhs), + } + } + + fn term(&mut self) -> ParseResult<'code, Expr> { + let lhs = self.factor()?; + match self.peek_kind() { + Some(TokenType::Plus) => parse_bin_op!(self, lhs, BinaryOpKind::Add, factor), + Some(TokenType::Minus) => parse_bin_op!(self, lhs, BinaryOpKind::Sub, factor), + _ => Ok(lhs), + } + } + + fn factor(&mut self) -> ParseResult<'code, Expr> { + let lhs = self.unary()?; + match self.peek_kind() { + Some(TokenType::Asterisk) => parse_bin_op!(self, lhs, BinaryOpKind::Mul, unary), + Some(TokenType::Slash) => parse_bin_op!(self, lhs, BinaryOpKind::Div, unary), + Some(TokenType::Percent) => parse_bin_op!(self, lhs, BinaryOpKind::Mod, unary), + _ => Ok(lhs), + } + } + + fn unary(&mut self) -> ParseResult<'code, Expr> { + match self.peek_kind() { + Some(TokenType::Not) => { + let unary_op_span = self.next().unwrap().span; + let expr = self.expression()?; + Ok(Expr::UnaryOp(Box::new(UnaryOp { + span: unary_op_span.extend(expr.span()), + expr, + kind: UnaryOpKind::Not, + }))) + } + Some(TokenType::Minus) => { + let unary_op_span = self.next().unwrap().span; + let expr = self.expression()?; + Ok(Expr::UnaryOp(Box::new(UnaryOp { + span: unary_op_span.extend(expr.span()), + expr, + kind: UnaryOpKind::Neg, + }))) + } + _ => self.primary(), + } + } + + fn primary(&mut self) -> ParseResult<'code, Expr> { + let next = self.next().ok_or(ParseErr::EOF("primary"))?; + match next.kind { + TokenType::String(literal) => Ok(Expr::Literal(Literal::String(literal, next.span))), + TokenType::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))), + TokenType::False => Ok(Expr::Literal(Literal::Boolean(false, next.span))), + TokenType::True => Ok(Expr::Literal(Literal::Boolean(true, next.span))), + TokenType::Null => Ok(Expr::Literal(Literal::Null(next.span))), + TokenType::BraceO => self.object_literal(next.span), + TokenType::BracketO => self.array_literal(next.span), + TokenType::ParenO => { + let expr = self.expression()?; + let _ = self.expect(TokenType::ParenC)?; + Ok(expr) + } + TokenType::Ident(name) => { + let name_owned = name.to_owned(); + Ok(Expr::Ident(name_owned, next.span)) + } + _ => Err(ParseErr::InvalidTokenPrimary(next)), + } + } + + fn object_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr> { + let close_span = self.expect(TokenType::BraceC)?.span; + Ok(Expr::Literal(Literal::Object(open_span.extend(close_span)))) + } + + fn array_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr> { + let mut elements = Vec::new(); + while self + .peek() + .ok_or(ParseErr::EOFExpecting(TokenType::BracketC))? + .kind + != TokenType::BracketC + { + let expr = self.expression()?; + elements.push(expr); + self.expect(TokenType::Comma)?; + } + let closing_bracket = self.expect(TokenType::BracketC)?; + Ok(Expr::Literal(Literal::Array( + elements, + open_span.extend(closing_bracket.span), + ))) + } + + // token helpers + + #[must_use] + fn next(&mut self) -> Option> { + self.tokens.next() + } + + #[must_use] + fn peek(&mut self) -> Option<&Token<'code>> { + self.tokens.peek() + } + + #[must_use] + fn peek_kind(&mut self) -> Option<&TokenType<'code>> { + self.peek().map(|token| &token.kind) + } + + fn expect(&mut self, kind: TokenType<'code>) -> ParseResult<'code, Token> { + if let Some(token) = self.next() { + if token.kind == kind { + Ok(token) + } else { + Err(ParseErr::MismatchedKind { + expected: kind, + actual: token, + }) + } + } else { + Err(ParseErr::EOFExpecting(kind)) + } + } +} + +#[derive(Debug)] +pub enum ParseErr<'code> { + BreakOutsideLoop(Span), + ReturnOutsideFunction(Span), + MismatchedKind { + expected: TokenType<'code>, + actual: Token<'code>, + }, + InvalidTokenPrimary(Token<'code>), + EOFExpecting(TokenType<'code>), + EOF(&'static str), +} + +impl CompilerError for ParseErr<'_> { + fn span(&self) -> Span { + match self { + ParseErr::MismatchedKind { + actual: Token { span, .. }, + .. + } => *span, + ParseErr::InvalidTokenPrimary(Token { span, .. }) => *span, + ParseErr::EOFExpecting(_) => Span::dummy(), + ParseErr::EOF(_) => Span::dummy(), + ParseErr::BreakOutsideLoop(span) => *span, + ParseErr::ReturnOutsideFunction(span) => *span, + } + } + + fn message(&self) -> String { + match self { + ParseErr::MismatchedKind { expected, actual } => { + format!("expected: {:?}, received: {:?}", expected, actual.kind) + } + ParseErr::InvalidTokenPrimary(token) => { + format!("invalid token in expression: {:?}", token.kind) + } + ParseErr::EOFExpecting(token) => { + format!("reached EOF searching for: {:?}", token) + } + ParseErr::EOF(message) => { + format!("reached EOF while parsing: {}", message) + } + ParseErr::BreakOutsideLoop(_) => "break used outside of loop".to_string(), + ParseErr::ReturnOutsideFunction(_) => "return used outside of function".to_string(), + } + } + + fn note(&self) -> Option { + None + } +} diff --git a/src/parse/test.rs b/src/parse/test.rs new file mode 100644 index 0000000..b795ff0 --- /dev/null +++ b/src/parse/test.rs @@ -0,0 +1,423 @@ +use crate::ast::BinaryOp; +use crate::parse::Parser; +use prelude::*; + +mod prelude { + pub(super) use super::{parser, test_literal_bin_op, test_number_literal, token}; + pub(super) use crate::ast::{BinaryOp, BinaryOpKind, Expr, Literal}; + pub(super) use crate::errors::Span; + pub(super) use crate::lex::{Token, TokenType}; +} + +fn token(kind: TokenType) -> Token { + Token { + span: Span::dummy(), + kind, + } +} + +fn parser(tokens: Vec) -> Parser { + Parser { + tokens: tokens.into_iter().peekable(), + inside_fn_depth: 0, + inside_loop_depth: 0, + } +} + +fn test_literal_bin_op>) -> Expr>( + token_type: TokenType, + expected_op_kind: BinaryOpKind, + parser: F, +) { + let tokens = [TokenType::Number(10.0), token_type, TokenType::Number(4.0)] + .map(token) + .into(); + let factor = parser(tokens); + assert_eq!( + Expr::BinaryOp(Box::new(BinaryOp { + span: Span::dummy(), + lhs: Expr::Literal(Literal::Number(10.0, Span::dummy())), + rhs: Expr::Literal(Literal::Number(4.0, Span::dummy())), + kind: expected_op_kind + })), + factor + ); +} + +fn test_number_literal>) -> Expr>(parser: F) { + let tokens = [TokenType::Number(10.0)].map(token).into(); + let unary = parser(tokens); + assert_eq!(Expr::Literal(Literal::Number(10.0, Span::dummy())), unary); +} + +mod expr { + use super::prelude::*; + use crate::ast::{UnaryOp, UnaryOpKind}; + use TokenType::*; + + fn parse_expr(tokens: Vec) -> Expr { + let mut parser = parser(tokens); + parser.expression().unwrap() + } + + #[test] + fn number_literal() { + test_number_literal(parse_expr); + } + + #[test] + fn add_multiply() { + let tokens = [Number(10.0), Plus, Number(20.0), Asterisk, Number(100.0)] + .map(token) + .into(); + let expr = parse_expr(tokens); + assert_eq!( + Expr::BinaryOp(Box::new(BinaryOp { + span: Span::dummy(), + lhs: Expr::Literal(Literal::Number(10.0, Span::dummy())), + rhs: Expr::BinaryOp(Box::new(BinaryOp { + span: Span::dummy(), + lhs: Expr::Literal(Literal::Number(20.0, Span::dummy())), + rhs: Expr::Literal(Literal::Number(100.0, Span::dummy())), + + kind: BinaryOpKind::Mul + })), + kind: BinaryOpKind::Add + })), + expr + ); + } + + #[test] + fn equal_unary() { + let tokens = [Number(10.0), EqualEqual, Minus, Number(10.0)] + .map(token) + .into(); + let expr = parse_expr(tokens); + assert_eq!( + Expr::BinaryOp(Box::new(BinaryOp { + span: Span::dummy(), + lhs: Expr::Literal(Literal::Number(10.0, Span::dummy())), + rhs: Expr::UnaryOp(Box::new(UnaryOp { + span: Span::dummy(), + expr: Expr::Literal(Literal::Number(10.0, Span::dummy())), + kind: UnaryOpKind::Neg + })), + kind: BinaryOpKind::Equal + })), + expr + ); + } + + #[test] + fn parentheses_mul_add() { + let tokens = [ + Number(10.0), + Asterisk, + ParenO, + Number(20.0), + Plus, + Number(30.0), + ParenC, + ] + .map(token) + .into(); + let expr = parse_expr(tokens); + assert_eq!( + Expr::BinaryOp(Box::new(BinaryOp { + span: Span::dummy(), + lhs: Expr::Literal(Literal::Number(10.0, Span::dummy())), + rhs: Expr::BinaryOp(Box::new(BinaryOp { + span: Span::dummy(), + lhs: Expr::Literal(Literal::Number(20.0, Span::dummy())), + rhs: Expr::Literal(Literal::Number(30.0, Span::dummy())), + + kind: BinaryOpKind::Add + })), + kind: BinaryOpKind::Mul + })), + expr + ); + } +} + +mod logical_or { + use super::prelude::*; + + fn parse_logical_or(tokens: Vec) -> Expr { + let mut parser = parser(tokens); + parser.logical_or().unwrap() + } + + #[test] + fn number_literal() { + test_number_literal(parse_logical_or); + } + + #[test] + fn and() { + test_literal_bin_op(TokenType::Or, BinaryOpKind::Or, parse_logical_or); + } +} + +mod logical_and { + use super::prelude::*; + + fn parse_logical_and(tokens: Vec) -> Expr { + let mut parser = parser(tokens); + parser.logical_and().unwrap() + } + + #[test] + fn number_literal() { + test_number_literal(parse_logical_and); + } + + #[test] + fn and() { + test_literal_bin_op(TokenType::And, BinaryOpKind::And, parse_logical_and); + } +} + +mod equality { + use super::prelude::*; + + fn parse_equality(tokens: Vec) -> Expr { + let mut parser = parser(tokens); + parser.equality().unwrap() + } + + #[test] + fn number_literal() { + test_number_literal(parse_equality); + } + + #[test] + fn not_equal() { + test_literal_bin_op(TokenType::BangEqual, BinaryOpKind::NotEqual, parse_equality); + } + + #[test] + fn equal() { + test_literal_bin_op(TokenType::EqualEqual, BinaryOpKind::Equal, parse_equality); + } +} + +mod comparison { + use super::prelude::*; + + fn parse_comparison(tokens: Vec) -> Expr { + let mut parser = parser(tokens); + parser.comparison().unwrap() + } + + #[test] + fn number_literal() { + test_number_literal(parse_comparison); + } + + #[test] + fn greater() { + test_literal_bin_op(TokenType::Greater, BinaryOpKind::Greater, parse_comparison); + } + + #[test] + fn greater_equal() { + test_literal_bin_op( + TokenType::GreaterEqual, + BinaryOpKind::GreaterEqual, + parse_comparison, + ); + } + + #[test] + fn less() { + test_literal_bin_op(TokenType::Less, BinaryOpKind::Less, parse_comparison); + } + + #[test] + fn less_equal() { + test_literal_bin_op( + TokenType::LessEqual, + BinaryOpKind::LessEqual, + parse_comparison, + ); + } +} + +mod term { + use super::prelude::*; + + fn parse_term(tokens: Vec) -> Expr { + let mut parser = parser(tokens); + parser.term().unwrap() + } + + #[test] + fn number_literal() { + test_number_literal(parse_term); + } + + #[test] + fn add() { + test_literal_bin_op(TokenType::Plus, BinaryOpKind::Add, parse_term); + } + + #[test] + fn sub() { + test_literal_bin_op(TokenType::Minus, BinaryOpKind::Sub, parse_term); + } +} + +mod factor { + use super::prelude::*; + + fn parse_factor(tokens: Vec) -> Expr { + let mut parser = parser(tokens); + parser.factor().unwrap() + } + + #[test] + fn number_literal() { + test_number_literal(parse_factor); + } + + #[test] + fn multiply() { + test_literal_bin_op(TokenType::Asterisk, BinaryOpKind::Mul, parse_factor); + } + + #[test] + fn divide() { + test_literal_bin_op(TokenType::Slash, BinaryOpKind::Div, parse_factor); + } + + #[test] + fn modulo() { + test_literal_bin_op(TokenType::Percent, BinaryOpKind::Mod, parse_factor); + } +} + +mod unary { + use super::prelude::*; + use crate::ast::{UnaryOp, UnaryOpKind}; + + fn parse_unary(tokens: Vec) -> Expr { + let mut parser = parser(tokens); + parser.unary().unwrap() + } + + #[test] + fn number_literal() { + test_number_literal(parse_unary); + } + + // needs expr support + + #[test] + fn not() { + let tokens = [TokenType::Not, TokenType::True].map(token).into(); + let unary = parse_unary(tokens); + assert_eq!( + Expr::UnaryOp(Box::new(UnaryOp { + span: Span::dummy(), + expr: Expr::Literal(Literal::Boolean(true, Span::dummy())), + kind: UnaryOpKind::Not + })), + unary + ); + } + + #[test] + fn neg() { + let tokens = [TokenType::Minus, TokenType::Number(10.0)] + .map(token) + .into(); + let unary = parse_unary(tokens); + assert_eq!( + Expr::UnaryOp(Box::new(UnaryOp { + span: Span::dummy(), + expr: Expr::Literal(Literal::Number(10.0, Span::dummy())), + kind: UnaryOpKind::Neg + })), + unary + ); + } +} + +mod primary { + use super::prelude::*; + + fn parse_primary(tokens: Vec) -> Expr { + let mut parser = parser(tokens); + parser.primary().unwrap() + } + + #[test] + fn ident() { + let tokens = [TokenType::Ident("tokens")].map(token).into(); + let literal = parse_primary(tokens); + assert_eq!(Expr::Ident("tokens".to_string(), Span::dummy()), literal); + } + + #[test] + fn string() { + let tokens = [TokenType::Number(10.0)].map(token).into(); + let literal = parse_primary(tokens); + assert_eq!(Expr::Literal(Literal::Number(10.0, Span::dummy())), literal); + } + + #[test] + fn number() { + let tokens = [TokenType::String("uwu".to_string())].map(token).into(); + let literal = parse_primary(tokens); + assert_eq!( + Expr::Literal(Literal::String("uwu".to_string(), Span::dummy())), + literal + ); + } + + #[test] + fn empty_object() { + let tokens = [TokenType::BraceO, TokenType::BraceC].map(token).into(); + let literal = parse_primary(tokens); + assert_eq!(Expr::Literal(Literal::Object(Span::dummy())), literal); + } + + #[test] + fn empty_array() { + let tokens = [TokenType::BracketO, TokenType::BracketC].map(token).into(); + let literal = parse_primary(tokens); + assert_eq!( + Expr::Literal(Literal::Array(Vec::new(), Span::dummy())), + literal + ); + } + + #[test] + fn r#false() { + let tokens = [TokenType::False].map(token).into(); + let literal = parse_primary(tokens); + assert_eq!( + Expr::Literal(Literal::Boolean(false, Span::dummy())), + literal + ); + } + + #[test] + fn r#true() { + let tokens = [TokenType::True].map(token).into(); + let literal = parse_primary(tokens); + assert_eq!( + Expr::Literal(Literal::Boolean(true, Span::dummy())), + literal + ); + } + + #[test] + fn null() { + let tokens = [TokenType::Null].map(token).into(); + let literal = parse_primary(tokens); + assert_eq!(Expr::Literal(Literal::Null(Span::dummy())), literal); + } +}