diff --git a/parser/src/ast.rs b/parser/src/ast.rs index 76864e8..a3d28ae 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -5,6 +5,60 @@ use dbg_pls::DebugPls; use crate::Spanned; +// +// --- Expr +// + +#[derive(Debug, DebugPls)] +pub enum Atom { + Ident(String), + Int(i128), + Float(f64), + String(String), + Char(u8), +} + +#[derive(Debug, DebugPls)] +pub enum UnaryOp { + AddrOf, + Deref, + Plus, + Minus, + Tilde, + Bang, +} + +#[derive(Debug, DebugPls)] +pub enum BinaryOp { + Add, + Sub, + Comma, +} + +#[derive(Debug, DebugPls)] +pub struct ExprUnary { + pub rhs: Box>, + pub op: UnaryOp, +} + +#[derive(Debug, DebugPls)] +pub struct ExprBinary { + pub lhs: Box>, + pub rhs: Box>, + pub op: BinaryOp, +} + +#[derive(Debug, DebugPls)] +pub enum Expr { + Atom(Atom), + Unary(ExprUnary), + Binary(ExprBinary), +} + +// +// --- Types and decls and garbage whatever +// + #[derive(Debug, DebugPls)] pub enum TypeSpecifier { Void, @@ -59,7 +113,7 @@ pub enum Decl { #[derive(Debug, DebugPls)] pub struct InitDecl { pub declarator: Declarator, - pub init: Option<()>, + pub init: Option, } #[derive(Debug, DebugPls)] @@ -92,7 +146,7 @@ pub struct Declarator { #[derive(Debug, DebugPls)] pub struct FunctionDef { pub declaration: Decl, - pub body: Vec<()>, + pub body: Vec, } #[derive(Debug, DebugPls)] diff --git a/parser/src/parser.rs b/parser/src/parser.rs index ef38ccd..89c3158 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -10,6 +10,8 @@ use crate::{ Span, Spanned, }; +mod expr; + #[derive(Debug)] pub struct ParserError { span: Span, diff --git a/parser/src/parser/expr.rs b/parser/src/parser/expr.rs new file mode 100644 index 0000000..fe6979b --- /dev/null +++ b/parser/src/parser/expr.rs @@ -0,0 +1,124 @@ +//! The expression parser is implemented as a pratt parser. +//! +//! For more information, see https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html + +use crate::{ + ast::{Atom, BinaryOp, Expr, ExprBinary, ExprUnary, UnaryOp}, + parser::{Parser, ParserError, Result}, + pre::Punctuator as P, + token::{Constant, Token as Tok}, + Span, Spanned, +}; + +impl<'src, I> Parser<'src, I> +where + I: Iterator, Span)>, +{ + pub fn expr(&mut self) -> Result> { + self.expr_bp(0) + } + + fn get_lhs(&mut self) -> Result> { + let (typ, span) = match self.peek_t()? { + (Tok::Ident(ident), span) => (Atom::Ident(ident.to_string()), span), + (Tok::StringLiteral(literal), span) => (Atom::String(literal.to_string()), span), + (Tok::Constant(Constant::Int(int)), span) => (Atom::Int(*int), span), + (Tok::Constant(Constant::Float(float)), span) => (Atom::Float(*float), span), + (Tok::Constant(Constant::Char(char)), span) => (Atom::Char(*char), span), + &(Tok::Punct(punct), span) => { + let r_bp = prefix_binding_power(&Tok::Punct(punct)); + let op = unary_op_from_token(&Tok::Punct(punct), span)?; + let rhs = self.expr_bp(r_bp)?; + + return Ok(( + Expr::Unary(ExprUnary { + rhs: Box::new(rhs), + op, + }), + span, + )); + } + (tok, span) => { + return Err(ParserError::new( + *span, + format!("expected expression, found {tok}"), + )); + } + }; + + Ok((Expr::Atom(typ), *span)) + } + + fn expr_bp(&mut self, min_bp: u8) -> Result> { + let mut lhs = self.get_lhs()?; + + loop { + let (tok, span) = match self.next_t() { + Ok(tok) => tok, + Err(_) => break, + }; + let op = binary_op_from_token(&tok, span)?; + + let (l_bp, r_bp) = infix_binding_power(&tok); + if l_bp < min_bp { + break; + } + + let rhs = self.expr_bp(r_bp)?; + + let span = lhs.1.extend(rhs.1); + + lhs = ( + Expr::Binary(ExprBinary { + lhs: Box::new(lhs), + rhs: Box::new(rhs), + op, + }), + span, + ) + } + + todo!() + } +} + +fn unary_op_from_token(tok: &Tok<'_>, span: Span) -> Result { + match tok { + Tok::Punct(P::Ampersand) => Ok(UnaryOp::AddrOf), + Tok::Punct(P::Asterisk) => Ok(UnaryOp::Deref), + Tok::Punct(P::Plus) => Ok(UnaryOp::Plus), + Tok::Punct(P::Minus) => Ok(UnaryOp::Minus), + Tok::Punct(P::Tilde) => Ok(UnaryOp::Tilde), + Tok::Punct(P::Bang) => Ok(UnaryOp::Bang), + _ => Err(ParserError::new( + span, + format!("invalid unary operation: {tok}"), + )), + } +} +fn binary_op_from_token(tok: &Tok<'_>, span: Span) -> Result { + match tok { + Tok::Punct(P::Plus) => Ok(BinaryOp::Add), + Tok::Punct(P::Minus) => Ok(BinaryOp::Sub), + _ => Err(ParserError::new( + span, + format!("invalid binary operation: {tok}"), + )), + } +} + +fn prefix_binding_power(tok: &Tok<'_>) -> u8 { + match tok { + Tok::Punct(P::Ampersand | P::Asterisk | P::Plus | P::Minus | P::Tilde | P::Bang) => 255, + _ => panic!("invalid token in expression! {tok:?}"), + } +} + +fn infix_binding_power(tok: &Tok<'_>) -> (u8, u8) { + match tok { + Tok::Punct(P::Comma) => (1, 2), + Tok::Punct(P::Plus | P::Minus) => (3, 4), + Tok::Punct(P::Asterisk | P::Slash) => (5, 6), + _ => panic!("invalid token in expression! {tok:?}"), + } +} diff --git a/parser/src/pre/lexer.rs b/parser/src/pre/lexer.rs index 5298158..6005380 100644 --- a/parser/src/pre/lexer.rs +++ b/parser/src/pre/lexer.rs @@ -55,6 +55,8 @@ pub enum Punctuator { Tilde, /// ! 🤯 Bang, + /// / + Slash, //// % Percent, /// << @@ -138,6 +140,7 @@ impl Display for Punctuator { Punctuator::Minus => f.write_str("-"), Punctuator::Tilde => f.write_str("~"), Punctuator::Bang => f.write_str("!"), + Punctuator::Slash => f.write_str("/"), Punctuator::Percent => f.write_str("%"), Punctuator::LeftLeftChevron => f.write_str("<<"), Punctuator::RightRightChevron => f.write_str(">>"), @@ -383,6 +386,7 @@ where (b'-', _, _) => break (TokP(Punctuator::Minus), start_span), (b'~', _, _) => break (TokP(Punctuator::Tilde), start_span), (b'!', _, _) => break (TokP(Punctuator::Bang), start_span), + (b'/', _, _) => break (TokP(Punctuator::Slash), start_span), (b'%', _, _) => break (TokP(Punctuator::Percent), start_span), (b'<', _, _) => break (TokP(Punctuator::LeftChevron), start_span), (b'>', _, _) => break (TokP(Punctuator::RightChevron), start_span),