mirror of
https://github.com/Noratrieb/dilaria.git
synced 2026-01-14 17:35:03 +01:00
486 lines
15 KiB
Rust
486 lines
15 KiB
Rust
#[cfg(test)]
|
|
mod test;
|
|
|
|
use crate::ast::*;
|
|
use crate::errors::{CompilerError, Span};
|
|
use crate::lex::{Token, TokenType};
|
|
use std::iter::Peekable;
|
|
|
|
pub fn parse(tokens: Vec<Token>) -> Result<Program, ParseErr> {
|
|
let mut parser = Parser {
|
|
tokens: tokens.into_iter().peekable(),
|
|
inside_fn_depth: 0,
|
|
inside_loop_depth: 0,
|
|
};
|
|
let program = parser.program()?;
|
|
Ok(program)
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct Parser<'code> {
|
|
tokens: Peekable<std::vec::IntoIter<Token<'code>>>,
|
|
inside_fn_depth: usize,
|
|
inside_loop_depth: usize,
|
|
}
|
|
|
|
type ParseResult<'code, T> = Result<T, ParseErr<'code>>;
|
|
|
|
macro_rules! parse_bin_op {
|
|
($self: ident, $lhs: ident, $kind: expr, $function: ident) => {{
|
|
let _ = $self.next();
|
|
let rhs = $self.$function()?;
|
|
Ok(Expr::BinaryOp(Box::new(BinaryOp {
|
|
span: $lhs.span().extend(rhs.span()),
|
|
lhs: $lhs,
|
|
rhs,
|
|
kind: $kind,
|
|
})))
|
|
}};
|
|
}
|
|
|
|
impl<'code> Parser<'code> {
|
|
fn program(&mut self) -> ParseResult<'code, Program> {
|
|
Ok(Program(self.statement_list()?))
|
|
}
|
|
|
|
fn statement_list(&mut self) -> ParseResult<'code, Vec<Stmt>> {
|
|
let mut stmts = Vec::new();
|
|
loop {
|
|
if let Some(TokenType::BraceC) | None = self.peek_kind() {
|
|
return Ok(stmts);
|
|
}
|
|
let stmt = self.statement()?;
|
|
stmts.push(stmt);
|
|
}
|
|
}
|
|
|
|
fn block(&mut self) -> ParseResult<'code, Block> {
|
|
let start_span = self.expect(TokenType::BraceO)?.span;
|
|
let stmts = self.statement_list()?;
|
|
let end_span = self.expect(TokenType::BraceC)?.span;
|
|
Ok(Block {
|
|
stmts,
|
|
span: start_span.extend(end_span),
|
|
})
|
|
}
|
|
|
|
fn statement(&mut self) -> ParseResult<'code, Stmt> {
|
|
match *self.peek_kind().ok_or(ParseErr::Eof("statement"))? {
|
|
TokenType::Let => self.declaration(),
|
|
TokenType::Fn => self.fn_decl(),
|
|
TokenType::If => Ok(Stmt::If(self.if_stmt()?)),
|
|
TokenType::Loop => self.loop_stmt(),
|
|
TokenType::While => self.while_stmt(),
|
|
TokenType::Break => self.break_stmt(),
|
|
TokenType::Return => self.return_stmt(),
|
|
TokenType::BraceO => Ok(Stmt::Block(self.block()?)),
|
|
_ => {
|
|
let expr = self.expression()?;
|
|
self.expect(TokenType::Semi)?;
|
|
Ok(Stmt::Expr(expr))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn declaration(&mut self) -> ParseResult<'code, Stmt> {
|
|
let keyword_span = self.expect(TokenType::Let)?.span;
|
|
let name = self.ident()?;
|
|
self.expect(TokenType::Equal)?;
|
|
let init = self.expression()?;
|
|
self.expect(TokenType::Semi)?;
|
|
Ok(Stmt::Declaration(Declaration {
|
|
span: keyword_span.extend(init.span()),
|
|
name,
|
|
init,
|
|
}))
|
|
}
|
|
|
|
fn assignment(&mut self) -> ParseResult<'code, Stmt> {
|
|
todo!("oh god no")
|
|
}
|
|
|
|
fn fn_decl(&mut self) -> ParseResult<'code, Stmt> {
|
|
let keyword_span = self.expect(TokenType::Fn)?.span;
|
|
let name = self.ident()?;
|
|
let args = self.fn_args()?;
|
|
|
|
self.inside_fn_depth += 1;
|
|
let body = self.block()?;
|
|
self.inside_fn_depth -= 1;
|
|
|
|
Ok(Stmt::FnDecl(FnDecl {
|
|
span: keyword_span.extend(body.span),
|
|
name,
|
|
params: args,
|
|
body,
|
|
}))
|
|
}
|
|
|
|
fn fn_args(&mut self) -> ParseResult<'code, Vec<Ident>> {
|
|
self.expect(TokenType::ParenO)?;
|
|
let params = self.parse_list(TokenType::ParenC, Self::ident)?;
|
|
self.expect(TokenType::ParenC)?;
|
|
Ok(params)
|
|
}
|
|
|
|
fn if_stmt(&mut self) -> ParseResult<'code, IfStmt> {
|
|
let keyword_span = self.expect(TokenType::If)?.span;
|
|
let cond = self.expression()?;
|
|
let body = self.block()?;
|
|
|
|
let else_part = if let Some(TokenType::Else) = self.peek_kind() {
|
|
Some(self.else_part()?)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
Ok(IfStmt {
|
|
span: keyword_span
|
|
.extend(body.span)
|
|
.option_extend(else_part.as_ref().map(|part| part.span())),
|
|
cond,
|
|
body,
|
|
else_part: else_part.map(Box::new),
|
|
})
|
|
}
|
|
|
|
fn else_part(&mut self) -> ParseResult<'code, ElsePart> {
|
|
let keyword_span = self.expect(TokenType::Else)?.span;
|
|
|
|
if let Some(TokenType::If) = self.peek_kind() {
|
|
let else_if_stmt = self.if_stmt()?;
|
|
let else_span = keyword_span.extend(else_if_stmt.span);
|
|
Ok(ElsePart::ElseIf(else_if_stmt, else_span))
|
|
} else {
|
|
let block = self.block()?;
|
|
let else_span = keyword_span.extend(block.span);
|
|
Ok(ElsePart::Else(block, else_span))
|
|
}
|
|
}
|
|
|
|
fn loop_stmt(&mut self) -> ParseResult<'code, Stmt> {
|
|
let keyword_span = self.expect(TokenType::Loop)?.span;
|
|
|
|
self.inside_loop_depth += 1;
|
|
let block = self.block()?;
|
|
self.inside_loop_depth -= 1;
|
|
|
|
let loop_span = keyword_span.extend(block.span);
|
|
Ok(Stmt::Loop(block, keyword_span.extend(loop_span)))
|
|
}
|
|
|
|
fn while_stmt(&mut self) -> ParseResult<'code, Stmt> {
|
|
let keyword_span = self.expect(TokenType::While)?.span;
|
|
let cond = self.expression()?;
|
|
|
|
self.inside_loop_depth += 1;
|
|
let body = self.block()?;
|
|
self.inside_loop_depth -= 1;
|
|
|
|
Ok(Stmt::While(WhileStmt {
|
|
span: keyword_span.extend(body.span),
|
|
cond,
|
|
body,
|
|
}))
|
|
}
|
|
|
|
fn break_stmt(&mut self) -> ParseResult<'code, Stmt> {
|
|
let keyword_span = self.expect(TokenType::Break)?.span;
|
|
let semi_span = self.expect(TokenType::Semi)?.span;
|
|
|
|
if self.inside_loop_depth == 0 {
|
|
Err(ParseErr::BreakOutsideLoop(keyword_span.extend(semi_span)))
|
|
} else {
|
|
Ok(Stmt::Break(keyword_span.extend(semi_span)))
|
|
}
|
|
}
|
|
|
|
fn return_stmt(&mut self) -> ParseResult<'code, Stmt> {
|
|
let keyword_span = self.expect(TokenType::Return)?.span;
|
|
|
|
let expr = if let Some(TokenType::Semi) = self.peek_kind() {
|
|
None
|
|
} else {
|
|
Some(self.expression()?)
|
|
};
|
|
|
|
let semi_span = self.expect(TokenType::Semi)?.span;
|
|
|
|
if self.inside_fn_depth == 0 {
|
|
Err(ParseErr::ReturnOutsideFunction(
|
|
keyword_span.extend(semi_span),
|
|
))
|
|
} else {
|
|
Ok(Stmt::Return(expr, keyword_span.extend(semi_span)))
|
|
}
|
|
}
|
|
|
|
fn expression(&mut self) -> ParseResult<'code, Expr> {
|
|
self.logical_or()
|
|
}
|
|
|
|
fn logical_or(&mut self) -> ParseResult<'code, Expr> {
|
|
let lhs = self.logical_and()?;
|
|
match self.peek_kind() {
|
|
Some(TokenType::Or) => parse_bin_op!(self, lhs, BinaryOpKind::Or, logical_or),
|
|
_ => Ok(lhs),
|
|
}
|
|
}
|
|
|
|
fn logical_and(&mut self) -> ParseResult<'code, Expr> {
|
|
let lhs = self.equality()?;
|
|
match self.peek_kind() {
|
|
Some(TokenType::And) => parse_bin_op!(self, lhs, BinaryOpKind::And, logical_and),
|
|
_ => Ok(lhs),
|
|
}
|
|
}
|
|
|
|
fn equality(&mut self) -> ParseResult<'code, Expr> {
|
|
let lhs = self.comparison()?;
|
|
match self.peek_kind() {
|
|
Some(TokenType::BangEqual) => {
|
|
parse_bin_op!(self, lhs, BinaryOpKind::NotEqual, comparison)
|
|
}
|
|
Some(TokenType::EqualEqual) => {
|
|
parse_bin_op!(self, lhs, BinaryOpKind::Equal, comparison)
|
|
}
|
|
_ => Ok(lhs),
|
|
}
|
|
}
|
|
|
|
fn comparison(&mut self) -> ParseResult<'code, Expr> {
|
|
let lhs = self.term()?;
|
|
match self.peek_kind() {
|
|
Some(TokenType::Greater) => parse_bin_op!(self, lhs, BinaryOpKind::Greater, term),
|
|
Some(TokenType::GreaterEqual) => {
|
|
parse_bin_op!(self, lhs, BinaryOpKind::GreaterEqual, term)
|
|
}
|
|
Some(TokenType::Less) => parse_bin_op!(self, lhs, BinaryOpKind::Less, term),
|
|
Some(TokenType::LessEqual) => {
|
|
parse_bin_op!(self, lhs, BinaryOpKind::LessEqual, term)
|
|
}
|
|
_ => Ok(lhs),
|
|
}
|
|
}
|
|
|
|
fn term(&mut self) -> ParseResult<'code, Expr> {
|
|
let lhs = self.factor()?;
|
|
match self.peek_kind() {
|
|
Some(TokenType::Plus) => parse_bin_op!(self, lhs, BinaryOpKind::Add, term),
|
|
Some(TokenType::Minus) => parse_bin_op!(self, lhs, BinaryOpKind::Sub, term),
|
|
_ => Ok(lhs),
|
|
}
|
|
}
|
|
|
|
fn factor(&mut self) -> ParseResult<'code, Expr> {
|
|
let lhs = self.unary()?;
|
|
match self.peek_kind() {
|
|
Some(TokenType::Asterisk) => parse_bin_op!(self, lhs, BinaryOpKind::Mul, factor),
|
|
Some(TokenType::Slash) => parse_bin_op!(self, lhs, BinaryOpKind::Div, factor),
|
|
Some(TokenType::Percent) => parse_bin_op!(self, lhs, BinaryOpKind::Mod, factor),
|
|
_ => Ok(lhs),
|
|
}
|
|
}
|
|
|
|
fn unary(&mut self) -> ParseResult<'code, Expr> {
|
|
match self.peek_kind() {
|
|
Some(TokenType::Not) => {
|
|
let unary_op_span = self.next().unwrap().span;
|
|
let expr = self.unary()?;
|
|
Ok(Expr::UnaryOp(Box::new(UnaryOp {
|
|
span: unary_op_span.extend(expr.span()),
|
|
expr,
|
|
kind: UnaryOpKind::Not,
|
|
})))
|
|
}
|
|
Some(TokenType::Minus) => {
|
|
let unary_op_span = self.next().unwrap().span;
|
|
let expr = self.unary()?;
|
|
Ok(Expr::UnaryOp(Box::new(UnaryOp {
|
|
span: unary_op_span.extend(expr.span()),
|
|
expr,
|
|
kind: UnaryOpKind::Neg,
|
|
})))
|
|
}
|
|
_ => self.primary(),
|
|
}
|
|
}
|
|
|
|
fn primary<'parser>(&'parser mut self) -> ParseResult<'code, Expr> {
|
|
let next = self.next().ok_or(ParseErr::Eof("primary"))?;
|
|
match next.kind {
|
|
TokenType::String(literal) => Ok(Expr::Literal(Literal::String(literal, next.span))),
|
|
TokenType::Number(literal) => Ok(Expr::Literal(Literal::Number(literal, next.span))),
|
|
TokenType::False => Ok(Expr::Literal(Literal::Boolean(false, next.span))),
|
|
TokenType::True => Ok(Expr::Literal(Literal::Boolean(true, next.span))),
|
|
TokenType::Null => Ok(Expr::Literal(Literal::Null(next.span))),
|
|
TokenType::BraceO => self.object_literal(next.span),
|
|
TokenType::BracketO => self.array_literal(next.span),
|
|
TokenType::ParenO => {
|
|
let expr = self.expression()?;
|
|
let _ = self.expect(TokenType::ParenC)?;
|
|
Ok(expr)
|
|
}
|
|
TokenType::Ident(name) => {
|
|
let name_owned = name.to_owned();
|
|
Ok(Expr::Ident(Ident {
|
|
name: name_owned,
|
|
span: next.span,
|
|
}))
|
|
}
|
|
_ => Err(ParseErr::InvalidTokenPrimary(next)),
|
|
}
|
|
}
|
|
|
|
fn ident(&mut self) -> ParseResult<'code, Ident> {
|
|
let Token { kind, span } = self.next().ok_or(ParseErr::Eof("identifier"))?;
|
|
match kind {
|
|
TokenType::Ident(name) => {
|
|
let name_owned = name.to_owned();
|
|
Ok(Ident {
|
|
name: name_owned,
|
|
span,
|
|
})
|
|
}
|
|
_ => {
|
|
return Err(ParseErr::MismatchedKind {
|
|
expected: TokenType::Ident("<ident>"),
|
|
actual: Token { span, kind },
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
fn object_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr> {
|
|
let close_span = self.expect(TokenType::BraceC)?.span;
|
|
Ok(Expr::Literal(Literal::Object(open_span.extend(close_span))))
|
|
}
|
|
|
|
fn array_literal(&mut self, open_span: Span) -> ParseResult<'code, Expr> {
|
|
let elements = self.parse_list(TokenType::BracketC, Self::expression)?;
|
|
let closing_bracket = self.expect(TokenType::BracketC)?;
|
|
Ok(Expr::Literal(Literal::Array(
|
|
elements,
|
|
open_span.extend(closing_bracket.span),
|
|
)))
|
|
}
|
|
|
|
fn parse_list<T, F>(
|
|
&mut self,
|
|
close: TokenType<'code>,
|
|
mut parser: F,
|
|
) -> ParseResult<'code, Vec<T>>
|
|
where
|
|
F: FnMut(&mut Self) -> ParseResult<'code, T>,
|
|
{
|
|
let mut elements = Vec::new();
|
|
|
|
if self.peek_kind() == Some(&close) {
|
|
return Ok(elements);
|
|
}
|
|
|
|
let expr = parser(self)?;
|
|
elements.push(expr);
|
|
|
|
while self
|
|
.peek_kind()
|
|
.ok_or_else(|| ParseErr::EofExpecting(close.clone()))?
|
|
!= &close
|
|
{
|
|
self.expect(TokenType::Comma)?;
|
|
|
|
// trailing comma support
|
|
if self.peek_kind() == Some(&close) {
|
|
break;
|
|
}
|
|
|
|
let expr = parser(self)?;
|
|
elements.push(expr);
|
|
}
|
|
Ok(elements)
|
|
}
|
|
|
|
// token helpers
|
|
|
|
#[must_use]
|
|
fn next(&mut self) -> Option<Token<'code>> {
|
|
self.tokens.next()
|
|
}
|
|
|
|
#[must_use]
|
|
fn peek(&mut self) -> Option<&Token<'code>> {
|
|
self.tokens.peek()
|
|
}
|
|
|
|
#[must_use]
|
|
fn peek_kind(&mut self) -> Option<&TokenType<'code>> {
|
|
self.peek().map(|token| &token.kind)
|
|
}
|
|
|
|
fn expect(&mut self, kind: TokenType<'code>) -> ParseResult<'code, Token> {
|
|
if let Some(token) = self.next() {
|
|
if token.kind == kind {
|
|
Ok(token)
|
|
} else {
|
|
Err(ParseErr::MismatchedKind {
|
|
expected: kind,
|
|
actual: token,
|
|
})
|
|
}
|
|
} else {
|
|
Err(ParseErr::EofExpecting(kind))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum ParseErr<'code> {
|
|
BreakOutsideLoop(Span),
|
|
ReturnOutsideFunction(Span),
|
|
MismatchedKind {
|
|
expected: TokenType<'code>,
|
|
actual: Token<'code>,
|
|
},
|
|
InvalidTokenPrimary(Token<'code>),
|
|
EofExpecting(TokenType<'code>),
|
|
Eof(&'static str),
|
|
}
|
|
|
|
impl CompilerError for ParseErr<'_> {
|
|
fn span(&self) -> Span {
|
|
match self {
|
|
ParseErr::MismatchedKind {
|
|
actual: Token { span, .. },
|
|
..
|
|
} => *span,
|
|
ParseErr::InvalidTokenPrimary(Token { span, .. }) => *span,
|
|
ParseErr::EofExpecting(_) => Span::dummy(),
|
|
ParseErr::Eof(_) => Span::dummy(),
|
|
ParseErr::BreakOutsideLoop(span) => *span,
|
|
ParseErr::ReturnOutsideFunction(span) => *span,
|
|
}
|
|
}
|
|
|
|
fn message(&self) -> String {
|
|
match self {
|
|
ParseErr::MismatchedKind { expected, actual } => {
|
|
format!("expected `{:?}`, received `{:?}`", expected, actual.kind)
|
|
}
|
|
ParseErr::InvalidTokenPrimary(token) => {
|
|
format!("invalid token in expression: `{:?}`", token.kind)
|
|
}
|
|
ParseErr::EofExpecting(token) => {
|
|
format!("reached EOF searching for `{:?}`", token)
|
|
}
|
|
ParseErr::Eof(message) => {
|
|
format!("reached EOF while parsing `{}`", message)
|
|
}
|
|
ParseErr::BreakOutsideLoop(_) => "break used outside of loop".to_string(),
|
|
ParseErr::ReturnOutsideFunction(_) => "return used outside of function".to_string(),
|
|
}
|
|
}
|
|
|
|
fn note(&self) -> Option<String> {
|
|
None
|
|
}
|
|
}
|