diff --git a/ub_parser/src/ast.rs b/ub_parser/src/ast.rs index 1a5d7df..b02bff7 100644 --- a/ub_parser/src/ast.rs +++ b/ub_parser/src/ast.rs @@ -5,7 +5,7 @@ type Span = Range; #[derive(Debug, Clone, PartialEq)] pub struct File { pub name: PathBuf, - pub items: Vec, // todo make item + pub items: Vec, } #[derive(Debug, Clone, PartialEq)] @@ -30,14 +30,14 @@ pub enum Item { #[derive(Debug, Clone, PartialEq)] pub struct FnDecl { pub name: String, - pub params: Vec, - pub ret_ty: Ty, + pub params: Vec, + pub ret_ty: Option, pub span: Span, pub body: Vec, } #[derive(Debug, Clone, PartialEq)] -pub struct FnParam { +pub struct NameTyPair { pub name: String, pub ty: Ty, pub span: Span, @@ -46,14 +46,8 @@ pub struct FnParam { #[derive(Debug, Clone, PartialEq)] pub struct StructDecl { pub name: String, + pub fields: Vec, pub span: Span, - pub fields: Vec, -} - -#[derive(Debug, Clone, PartialEq)] -pub struct StructField { - pub name: String, - pub ty: Ty, } #[derive(Debug, Clone, PartialEq)] diff --git a/ub_parser/src/lexer.rs b/ub_parser/src/lexer.rs index 0d9a09d..5f90832 100644 --- a/ub_parser/src/lexer.rs +++ b/ub_parser/src/lexer.rs @@ -58,6 +58,10 @@ pub enum Token<'a> { AndAnd, #[token("^")] Caret, + #[token("->")] + Arrow, + #[token(":")] + Colon, // keywords #[token("struct")] @@ -102,7 +106,7 @@ mod tests { #[test] fn punctuation() { - let tokens = lex_test("{} [] () .,; = == != >= <= < > + - * / | || & && ^"); + let tokens = lex_test("{} [] () .,; = == != >= <= < > + - * / | || & && ^ -> :"); insta::assert_debug_snapshot!(tokens); } diff --git a/ub_parser/src/parser.rs b/ub_parser/src/parser.rs index a07be8d..fb506d8 100644 --- a/ub_parser/src/parser.rs +++ b/ub_parser/src/parser.rs @@ -3,19 +3,35 @@ use std::{ops::Range, path::PathBuf}; use chumsky::{prelude::*, Stream}; use crate::{ - ast::{BinOp, BinOpKind, Call, Expr, File, Literal, Stmt}, + ast::{ + Assignment, BinOp, BinOpKind, Call, Expr, File, FnDecl, Item, Literal, NameTyPair, Stmt, + StructDecl, Ty, TyKind, VarDecl, + }, lexer::Token, }; type Error<'src> = Simple>; type Span = Range; -fn ident_parser<'src>() -> impl Parser, &'src str, Error = Error<'src>> + Clone { +fn ident_parser<'src>() -> impl Parser, String, Error = Error<'src>> + Clone { filter_map(|span, tok| match tok { - Token::Ident(ident) => Ok(ident), + Token::Ident(ident) => Ok(ident.to_owned()), _ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))), }) .labelled("identifier") + .boxed() +} + +fn ty_parser<'src>() -> impl Parser, Ty, Error = Error<'src>> + Clone { + filter_map(|span, token| { + let kind = match token { + Token::Ident("u64") => TyKind::U64, + _ => return Err(Simple::expected_input_found(span, Vec::new(), Some(token))), + }; + + Ok(Ty { span, kind }) + }) + .boxed() } fn expr_parser<'src>() -> impl Parser, Expr, Error = Error<'src>> + Clone { @@ -49,7 +65,8 @@ fn expr_parser<'src>() -> impl Parser, Expr, Error = Error<'src>> + .or(array) .or(expr .clone() - .delimited_by(just(Token::ParenO), just(Token::ParenC))); + .delimited_by(just(Token::ParenO), just(Token::ParenC))) + .boxed(); let call = atom .then( @@ -112,19 +129,114 @@ fn expr_parser<'src>() -> impl Parser, Expr, Error = Error<'src>> + }) }); - compare + compare.boxed() + }) +} + +fn statement_parser<'src>( + item: impl Parser, Item, Error = Error<'src>> + Clone, +) -> impl Parser, Stmt, Error = Error<'src>> + Clone { + let var_decl = ty_parser() + .then(ident_parser()) + .then_ignore(just(Token::Eq)) + .then(expr_parser()) + .map(|((ty, name), rhs)| { + Stmt::VarDecl(VarDecl { + name, + ty, + rhs: Some(rhs), + span: Default::default(), + }) + }); + + let assignment = expr_parser() + .then_ignore(just(Token::Eq)) + .then(expr_parser()) + .map(|(place, rhs)| { + Stmt::Assignment(Assignment { + place, + rhs, + span: Default::default(), + }) + }); + + var_decl + .or(assignment) + .or(expr_parser().map(|expr| Stmt::Expr(expr))) + .or(item.clone().map(|item| Stmt::Item(item))) + .then_ignore(just(Token::Semi)) +} + +fn name_ty_pair_parser<'src>() -> impl Parser, NameTyPair, Error = Error<'src>> + Clone +{ + ident_parser() + .then_ignore(just(Token::Colon)) + .then(ty_parser()) + .map_with_span(|(name, ty), span| NameTyPair { name, ty, span }) +} + +fn function_parser<'src>( + item: impl Parser, Item, Error = Error<'src>> + Clone, +) -> impl Parser, FnDecl, Error = Error<'src>> + Clone { + let name = ident_parser(); + + let params = name_ty_pair_parser() + .separated_by(just(Token::Comma)) + .allow_trailing() + .delimited_by(just(Token::ParenO), just(Token::ParenC)) + .labelled("function arguments"); + + let ret_ty = just(Token::Arrow).ignore_then(ty_parser()).or_not(); + + just(Token::Fn) + .map_with_span(|_, span| span) + .then(name) + .then(params) + .then(ret_ty) + .then( + statement_parser(item.clone()) + .repeated() + .delimited_by(just(Token::BraceO), just(Token::BraceC)), + ) + .map(|((((fn_span, name), params), ret_ty), body)| FnDecl { + name, + params, + ret_ty, + span: fn_span, + body, + }) + .labelled("function") +} + +fn struct_parser<'src>() -> impl Parser, StructDecl, Error = Error<'src>> + Clone { + let name = just(Token::Struct).ignore_then(ident_parser()); + + let fields = name_ty_pair_parser() + .separated_by(just(Token::Comma)) + .delimited_by(just(Token::BraceO), just(Token::BraceC)); + + name.then(fields).map(|(name, fields)| StructDecl { + name, + fields, + span: Default::default(), + }) +} + +fn item_parser<'src>() -> impl Parser, Item, Error = Error<'src>> + Clone { + recursive(|item| { + function_parser(item) + .map(Item::FnDecl) + .or(struct_parser().map(Item::StructDecl)) }) } fn file_parser<'src>( file_name: PathBuf, ) -> impl Parser, File, Error = Error<'src>> + Clone { - expr_parser() - .map(move |expr| File { - name: file_name.clone(), - items: vec![Stmt::Expr(expr)], - }) - .then_ignore(just(Token::Semi)) + item_parser().repeated().map(move |items| File { + name: file_name.clone(), + items, + }) } pub fn parse<'src, I>(lexer: I, len: usize, file_name: PathBuf) -> (Option, Vec>) @@ -132,7 +244,7 @@ where I: 'src, I: Iterator, Span)>, { - file_parser(file_name).parse_recovery(Stream::from_iter(len..len + 1, lexer)) + file_parser(file_name).parse_recovery_verbose(Stream::from_iter(len..len + 1, lexer)) } #[cfg(test)] @@ -156,13 +268,37 @@ mod tests { #[test] fn addition() { - let r = parse("1 + 4;"); + let r = parse("fn main() { 1 + 4; }"); insta::assert_debug_snapshot!(r); } #[test] fn expression() { - let r = parse("(4 / hallo()) + 5;"); + let r = parse("fn main() { (4 / hallo()) + 5; }"); + insta::assert_debug_snapshot!(r) + } + + #[test] + fn function() { + let r = parse("fn foo() -> u64 { 1 + 5; }"); + insta::assert_debug_snapshot!(r) + } + + #[test] + fn nested_function() { + let r = parse("fn foo() { fn foo2() {} fn foo3() {} }"); + insta::assert_debug_snapshot!(r) + } + + #[test] + fn nested_function2() { + let r = parse("fn foo() { fn foo2() {} 1 + 5; }"); + insta::assert_debug_snapshot!(r) + } + + #[test] + fn struct_() { + let r = parse("struct X { y: u64, x: u64 }"); insta::assert_debug_snapshot!(r) } } diff --git a/ub_parser/src/snapshots/ub_parser__lexer__tests__punctuation.snap b/ub_parser/src/snapshots/ub_parser__lexer__tests__punctuation.snap index ffa69d0..51f76d5 100644 --- a/ub_parser/src/snapshots/ub_parser__lexer__tests__punctuation.snap +++ b/ub_parser/src/snapshots/ub_parser__lexer__tests__punctuation.snap @@ -1,5 +1,6 @@ --- source: ub_parser/src/lexer.rs +assertion_line: 110 expression: tokens --- [ @@ -28,4 +29,6 @@ expression: tokens And, AndAnd, Caret, + Arrow, + Colon, ] diff --git a/ub_parser/src/snapshots/ub_parser__parser__tests__addition.snap b/ub_parser/src/snapshots/ub_parser__parser__tests__addition.snap index 45cec1d..0f86afb 100644 --- a/ub_parser/src/snapshots/ub_parser__parser__tests__addition.snap +++ b/ub_parser/src/snapshots/ub_parser__parser__tests__addition.snap @@ -1,5 +1,6 @@ --- source: ub_parser/src/parser.rs +assertion_line: 272 expression: r --- ( @@ -7,25 +8,35 @@ expression: r File { name: "ub_parser__parser__tests", items: [ - Expr( - BinOp( - BinOp { - kind: Add, - lhs: Literal( - Integer( - 1, - 0..1, + FnDecl( + FnDecl { + name: "main", + params: [], + ret_ty: None, + span: 0..2, + body: [ + Expr( + BinOp( + BinOp { + kind: Add, + lhs: Literal( + Integer( + 1, + 12..13, + ), + ), + rhs: Literal( + Integer( + 4, + 16..17, + ), + ), + span: 0..0, + }, ), ), - rhs: Literal( - Integer( - 4, - 4..5, - ), - ), - span: 0..0, - }, - ), + ], + }, ), ], }, diff --git a/ub_parser/src/snapshots/ub_parser__parser__tests__expression.snap b/ub_parser/src/snapshots/ub_parser__parser__tests__expression.snap index 29c45e2..043341f 100644 --- a/ub_parser/src/snapshots/ub_parser__parser__tests__expression.snap +++ b/ub_parser/src/snapshots/ub_parser__parser__tests__expression.snap @@ -1,5 +1,6 @@ --- source: ub_parser/src/parser.rs +assertion_line: 278 expression: r --- ( @@ -7,39 +8,49 @@ expression: r File { name: "ub_parser__parser__tests", items: [ - Expr( - BinOp( - BinOp { - kind: Add, - lhs: BinOp( - BinOp { - kind: Div, - lhs: Literal( - Integer( - 4, - 1..2, + FnDecl( + FnDecl { + name: "main", + params: [], + ret_ty: None, + span: 0..2, + body: [ + Expr( + BinOp( + BinOp { + kind: Add, + lhs: BinOp( + BinOp { + kind: Div, + lhs: Literal( + Integer( + 4, + 13..14, + ), + ), + rhs: Call( + Call { + callee: Name( + "hallo", + ), + args: [], + }, + ), + span: 0..0, + }, ), - ), - rhs: Call( - Call { - callee: Name( - "hallo", + rhs: Literal( + Integer( + 5, + 28..29, ), - args: [], - }, - ), - span: 0..0, - }, - ), - rhs: Literal( - Integer( - 5, - 16..17, + ), + span: 0..0, + }, ), ), - span: 0..0, - }, - ), + ], + }, ), ], }, diff --git a/ub_parser/src/snapshots/ub_parser__parser__tests__function.snap b/ub_parser/src/snapshots/ub_parser__parser__tests__function.snap new file mode 100644 index 0000000..4b6430d --- /dev/null +++ b/ub_parser/src/snapshots/ub_parser__parser__tests__function.snap @@ -0,0 +1,50 @@ +--- +source: ub_parser/src/parser.rs +assertion_line: 284 +expression: r +--- +( + Some( + File { + name: "ub_parser__parser__tests", + items: [ + FnDecl( + FnDecl { + name: "foo", + params: [], + ret_ty: Some( + Ty { + span: 12..15, + kind: U64, + }, + ), + span: 0..2, + body: [ + Expr( + BinOp( + BinOp { + kind: Add, + lhs: Literal( + Integer( + 1, + 18..19, + ), + ), + rhs: Literal( + Integer( + 5, + 22..23, + ), + ), + span: 0..0, + }, + ), + ), + ], + }, + ), + ], + }, + ), + [], +) diff --git a/ub_parser/src/snapshots/ub_parser__parser__tests__struct_.snap b/ub_parser/src/snapshots/ub_parser__parser__tests__struct_.snap new file mode 100644 index 0000000..98ef9cc --- /dev/null +++ b/ub_parser/src/snapshots/ub_parser__parser__tests__struct_.snap @@ -0,0 +1,39 @@ +--- +source: ub_parser/src/parser.rs +assertion_line: 290 +expression: r +--- +( + Some( + File { + name: "ub_parser__parser__tests", + items: [ + StructDecl( + StructDecl { + name: "X", + fields: [ + NameTyPair { + name: "y", + ty: Ty { + span: 14..17, + kind: U64, + }, + span: 11..17, + }, + NameTyPair { + name: "x", + ty: Ty { + span: 22..25, + kind: U64, + }, + span: 19..25, + }, + ], + span: 0..0, + }, + ), + ], + }, + ), + [], +)