diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..4d7dd9e --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,3 @@ +imports_granularity = "Crate" +newline_style = "Unix" +group_imports = "StdExternalCrate" \ No newline at end of file diff --git a/ub_parser/src/ast.rs b/ub_parser/src/ast.rs index 600116a..1a5d7df 100644 --- a/ub_parser/src/ast.rs +++ b/ub_parser/src/ast.rs @@ -1,16 +1,17 @@ -use crate::span::Span; -use std::path::PathBuf; +use std::{ops::Range, path::PathBuf}; + +type Span = Range; #[derive(Debug, Clone, PartialEq)] pub struct File { - name: PathBuf, - items: Vec, + pub name: PathBuf, + pub items: Vec, // todo make item } #[derive(Debug, Clone, PartialEq)] pub struct Ty { - span: Span, - kind: TyKind, + pub span: Span, + pub kind: TyKind, } #[derive(Debug, Clone, PartialEq)] @@ -28,31 +29,31 @@ pub enum Item { #[derive(Debug, Clone, PartialEq)] pub struct FnDecl { - name: String, - params: Vec, - ret_ty: Ty, - span: Span, - body: Vec, + pub name: String, + pub params: Vec, + pub ret_ty: Ty, + pub span: Span, + pub body: Vec, } #[derive(Debug, Clone, PartialEq)] pub struct FnParam { - name: String, - ty: Ty, - span: Span, + pub name: String, + pub ty: Ty, + pub span: Span, } #[derive(Debug, Clone, PartialEq)] pub struct StructDecl { - name: String, - span: Span, - fields: Vec, + pub name: String, + pub span: Span, + pub fields: Vec, } #[derive(Debug, Clone, PartialEq)] pub struct StructField { - name: String, - ty: Ty, + pub name: String, + pub ty: Ty, } #[derive(Debug, Clone, PartialEq)] @@ -68,25 +69,25 @@ pub enum Stmt { #[derive(Debug, Clone, PartialEq)] pub struct VarDecl { - name: String, - ty: Ty, - rhs: Option, - span: Span, + pub name: String, + pub ty: Ty, + pub rhs: Option, + pub span: Span, } #[derive(Debug, Clone, PartialEq)] pub struct Assignment { - place: Place, - rhs: Expr, - span: Span, + pub place: Expr, + pub rhs: Expr, + pub span: Span, } #[derive(Debug, Clone, PartialEq)] pub struct IfStmt { - cond: Expr, - body: Vec, - else_part: Option, - span: Span, + pub cond: Expr, + pub body: Vec, + pub else_part: Option, + pub span: Span, } #[derive(Debug, Clone, PartialEq)] @@ -97,35 +98,44 @@ pub enum ElsePart { #[derive(Debug, Clone, PartialEq)] pub struct WhileStmt { - cond: Expr, - body: Vec, - span: Span, + pub cond: Expr, + pub body: Vec, + pub span: Span, } #[derive(Debug, Clone, PartialEq)] pub struct LoopStmt { - body: Vec, - span: Span, + pub body: Vec, + pub span: Span, } #[derive(Debug, Clone, PartialEq)] pub enum Expr { BinOp(BinOp), - Place(Place), + FieldAccess(FieldAccess), Call(Call), Deref(Box), + Literal(Literal), + Name(String), + Array(Vec), } #[derive(Debug, Clone, PartialEq)] pub struct BinOp { - kind: BinOpKind, - lhs: Box, - rhs: Box, - span: Span, + pub kind: BinOpKind, + pub lhs: Box, + pub rhs: Box, + pub span: Span, } #[derive(Debug, Clone, PartialEq)] pub enum BinOpKind { + Eq, + Neq, + Gt, + Lt, + GtEq, + LtEq, Add, Sub, Mul, @@ -140,20 +150,20 @@ pub enum BinOpKind { Xor, } -#[derive(Debug, Clone, PartialEq)] -pub enum Place { - FieldAccess(FieldAccess), - Name(String), -} - #[derive(Debug, Clone, PartialEq)] pub struct FieldAccess { - expr: Box, - field_name: String, + pub expr: Box, + pub field_name: String, } #[derive(Debug, Clone, PartialEq)] pub struct Call { - callee: Box, - args: Vec, + pub callee: Box, + pub args: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + String(String, Span), + Integer(u64, Span), } diff --git a/ub_parser/src/lexer.rs b/ub_parser/src/lexer.rs index fa4b887..0d9a09d 100644 --- a/ub_parser/src/lexer.rs +++ b/ub_parser/src/lexer.rs @@ -1,6 +1,6 @@ use logos::Logos; -#[derive(Logos, Debug, PartialEq)] +#[derive(Logos, Debug, Clone, Hash, PartialEq, Eq)] pub enum Token<'a> { #[regex("//[^\n]*", logos::skip)] Comment, diff --git a/ub_parser/src/lib.rs b/ub_parser/src/lib.rs index 3bd7707..f70fdae 100644 --- a/ub_parser/src/lib.rs +++ b/ub_parser/src/lib.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; mod ast; mod lexer; -mod span; +mod parser; pub fn parse(_str: &str, _file_name: PathBuf) -> Result { todo!() diff --git a/ub_parser/src/parser.rs b/ub_parser/src/parser.rs new file mode 100644 index 0000000..a07be8d --- /dev/null +++ b/ub_parser/src/parser.rs @@ -0,0 +1,168 @@ +use std::{ops::Range, path::PathBuf}; + +use chumsky::{prelude::*, Stream}; + +use crate::{ + ast::{BinOp, BinOpKind, Call, Expr, File, Literal, Stmt}, + lexer::Token, +}; + +type Error<'src> = Simple>; +type Span = Range; + +fn ident_parser<'src>() -> impl Parser, &'src str, Error = Error<'src>> + Clone { + filter_map(|span, tok| match tok { + Token::Ident(ident) => Ok(ident), + _ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))), + }) + .labelled("identifier") +} + +fn expr_parser<'src>() -> impl Parser, Expr, Error = Error<'src>> + Clone { + recursive(|expr| { + let literal = filter_map(|span, token| match token { + Token::String(str) => Ok(Expr::Literal(Literal::String( + str[1..str.len() - 2].to_owned(), + span, + ))), + // todo lol unwrap + Token::Integer(int) => Ok(Expr::Literal(Literal::Integer(int.parse().unwrap(), span))), + _ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))), + }) + .labelled("literal"); + + // A list of expressions + let items = expr + .clone() + .chain(just(Token::Comma).ignore_then(expr.clone()).repeated()) + .then_ignore(just(Token::Comma).or_not()) + .or_not() + .map(|item| item.unwrap_or_else(Vec::new)); + + let array = items + .clone() + .delimited_by(just(Token::BracketO), just(Token::BracketC)) + .map(Expr::Array); + + let atom = literal + .or(ident_parser().map(|str| Expr::Name(str.to_owned()))) + .or(array) + .or(expr + .clone() + .delimited_by(just(Token::ParenO), just(Token::ParenC))); + + let call = atom + .then( + items + .delimited_by(just(Token::ParenO), just(Token::ParenC)) + .repeated(), + ) + .foldl(|callee, args| { + Expr::Call(Call { + callee: Box::new(callee), + args, + }) + }); + + let op = just(Token::Asterisk) + .to(BinOpKind::Mul) + .or(just(Token::Slash).to(BinOpKind::Div)); + + let product = call + .clone() + .then(op.then(call).repeated()) + .foldl(|a, (kind, b)| { + Expr::BinOp(BinOp { + kind, + lhs: Box::new(a), + rhs: Box::new(b), + span: 0..0, // lol todo + }) + }); + + // Sum ops (add and subtract) have equal precedence + let op = just(Token::Plus) + .to(BinOpKind::Add) + .or(just(Token::Minus).to(BinOpKind::Sub)); + let sum = product + .clone() + .then(op.then(product).repeated()) + .foldl(|a, (kind, b)| { + Expr::BinOp(BinOp { + kind, + lhs: Box::new(a), + rhs: Box::new(b), + span: 0..0, // lol todo + }) + }); + + // Comparison ops (equal, not-equal) have equal precedence + let op = just(Token::EqEq) + .to(BinOpKind::Eq) + .or(just(Token::BangEq).to(BinOpKind::Neq)); + let compare = sum + .clone() + .then(op.then(sum).repeated()) + .foldl(|a, (kind, b)| { + Expr::BinOp(BinOp { + kind, + lhs: Box::new(a), + rhs: Box::new(b), + span: 0..0, // lol todo + }) + }); + + compare + }) +} + +fn file_parser<'src>( + file_name: PathBuf, +) -> impl Parser, File, Error = Error<'src>> + Clone { + expr_parser() + .map(move |expr| File { + name: file_name.clone(), + items: vec![Stmt::Expr(expr)], + }) + .then_ignore(just(Token::Semi)) +} + +pub fn parse<'src, I>(lexer: I, len: usize, file_name: PathBuf) -> (Option, Vec>) +where + I: 'src, + I: Iterator, Span)>, +{ + file_parser(file_name).parse_recovery(Stream::from_iter(len..len + 1, lexer)) +} + +#[cfg(test)] +mod tests { + use std::{fmt::Debug, path::PathBuf}; + + use logos::Logos; + + use crate::lexer::Token; + + fn parse(src: &str) -> impl Debug + '_ { + let lexer = Token::lexer(src); + let len = lexer.source().len(); + + super::parse( + lexer.spanned(), + len, + PathBuf::from(module_path!().replace("::", "__")), + ) + } + + #[test] + fn addition() { + let r = parse("1 + 4;"); + insta::assert_debug_snapshot!(r); + } + + #[test] + fn expression() { + let r = parse("(4 / hallo()) + 5;"); + insta::assert_debug_snapshot!(r) + } +} diff --git a/ub_parser/src/snapshots/ub_parser__parser__tests__addition.snap b/ub_parser/src/snapshots/ub_parser__parser__tests__addition.snap new file mode 100644 index 0000000..45cec1d --- /dev/null +++ b/ub_parser/src/snapshots/ub_parser__parser__tests__addition.snap @@ -0,0 +1,34 @@ +--- +source: ub_parser/src/parser.rs +expression: r +--- +( + Some( + File { + name: "ub_parser__parser__tests", + items: [ + Expr( + BinOp( + BinOp { + kind: Add, + lhs: Literal( + Integer( + 1, + 0..1, + ), + ), + rhs: Literal( + Integer( + 4, + 4..5, + ), + ), + span: 0..0, + }, + ), + ), + ], + }, + ), + [], +) diff --git a/ub_parser/src/snapshots/ub_parser__parser__tests__expression.snap b/ub_parser/src/snapshots/ub_parser__parser__tests__expression.snap new file mode 100644 index 0000000..29c45e2 --- /dev/null +++ b/ub_parser/src/snapshots/ub_parser__parser__tests__expression.snap @@ -0,0 +1,48 @@ +--- +source: ub_parser/src/parser.rs +expression: r +--- +( + Some( + File { + name: "ub_parser__parser__tests", + items: [ + Expr( + BinOp( + BinOp { + kind: Add, + lhs: BinOp( + BinOp { + kind: Div, + lhs: Literal( + Integer( + 4, + 1..2, + ), + ), + rhs: Call( + Call { + callee: Name( + "hallo", + ), + args: [], + }, + ), + span: 0..0, + }, + ), + rhs: Literal( + Integer( + 5, + 16..17, + ), + ), + span: 0..0, + }, + ), + ), + ], + }, + ), + [], +) diff --git a/ub_parser/src/span.rs b/ub_parser/src/span.rs deleted file mode 100644 index 883bc42..0000000 --- a/ub_parser/src/span.rs +++ /dev/null @@ -1,22 +0,0 @@ -use std::ops::Range; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct Span { - start: usize, - len: usize, -} - -impl Span { - pub fn start_end(start: usize, end: usize) -> Self { - Self { - start, - len: end - start, - } - } -} - -impl From> for Span { - fn from(r: Range) -> Self { - Self::start_end(r.start, r.end) - } -}