rename parser crate

This commit is contained in:
nora 2022-03-25 20:38:55 +01:00
parent 4aabcdfd76
commit 632f9d3426
16 changed files with 31 additions and 27 deletions

13
parser/Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "parser"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
chumsky = "0.8.0"
logos = "0.12.0"
[dev-dependencies]
insta = "1.13.0"

163
parser/src/ast.rs Normal file
View file

@ -0,0 +1,163 @@
use std::{ops::Range, path::PathBuf};
type Span = Range<usize>;
#[derive(Debug, Clone, PartialEq)]
pub struct File {
pub name: PathBuf,
pub items: Vec<Item>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Ty {
pub span: Span,
pub kind: TyKind,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TyKind {
U64,
Ptr(Box<TyKind>),
Name(String),
}
#[derive(Debug, Clone, PartialEq)]
pub enum Item {
FnDecl(FnDecl),
StructDecl(StructDecl),
}
#[derive(Debug, Clone, PartialEq)]
pub struct FnDecl {
pub name: String,
pub params: Vec<NameTyPair>,
pub ret_ty: Option<Ty>,
pub span: Span,
pub body: Vec<Stmt>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct NameTyPair {
pub name: String,
pub ty: Ty,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub struct StructDecl {
pub name: String,
pub fields: Vec<NameTyPair>,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Stmt {
VarDecl(VarDecl),
Assignment(Assignment),
IfStmt(IfStmt),
WhileStmt(WhileStmt),
LoopStmt(LoopStmt),
Item(Item),
Expr(Expr),
}
#[derive(Debug, Clone, PartialEq)]
pub struct VarDecl {
pub name: String,
pub ty: Ty,
pub rhs: Option<Expr>,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Assignment {
pub place: Expr,
pub rhs: Expr,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub struct IfStmt {
pub cond: Expr,
pub body: Vec<Stmt>,
pub else_part: Option<ElsePart>,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ElsePart {
Else(Vec<Stmt>, Span),
ElseIf(Box<IfStmt>),
}
#[derive(Debug, Clone, PartialEq)]
pub struct WhileStmt {
pub cond: Expr,
pub body: Vec<Stmt>,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub struct LoopStmt {
pub body: Vec<Stmt>,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Expr {
BinOp(BinOp),
FieldAccess(FieldAccess),
Call(Call),
Deref(Box<Expr>),
Literal(Literal),
Name(String),
Array(Vec<Expr>),
}
#[derive(Debug, Clone, PartialEq)]
pub struct BinOp {
pub kind: BinOpKind,
pub lhs: Box<Expr>,
pub rhs: Box<Expr>,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum BinOpKind {
Eq,
Neq,
Gt,
Lt,
GtEq,
LtEq,
Add,
Sub,
Mul,
Div,
Mod,
Shr,
Shl,
And,
Or,
BitAnd,
BitOr,
Xor,
}
#[derive(Debug, Clone, PartialEq)]
pub struct FieldAccess {
pub expr: Box<Expr>,
pub field_name: String,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Call {
pub callee: Box<Expr>,
pub args: Vec<Expr>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Literal {
String(String, Span),
Integer(u64, Span),
}

139
parser/src/lexer.rs Normal file
View file

@ -0,0 +1,139 @@
use logos::Logos;
#[derive(Logos, Debug, Clone, Hash, PartialEq, Eq)]
pub enum Token<'a> {
#[regex("//[^\n]*", logos::skip)]
Comment,
// punctuation
#[token("{")]
BraceO,
#[token("}")]
BraceC,
#[token("[")]
BracketO,
#[token("]")]
BracketC,
#[token("(")]
ParenO,
#[token(")")]
ParenC,
#[token(".")]
Dot,
#[token(",")]
Comma,
#[token(";")]
Semi,
#[token("=")]
Eq,
#[token("==")]
EqEq,
#[token("!")]
Bang,
#[token("!=")]
BangEq,
#[token(">")]
Greater,
#[token("<")]
Less,
#[token(">=")]
GreaterEq,
#[token("<=")]
LessEq,
#[token("*")]
Asterisk,
#[token("/")]
Slash,
#[token("+")]
Plus,
#[token("-")]
Minus,
#[token("|")]
Or,
#[token("&")]
And,
#[token("||")]
OrOr,
#[token("&&")]
AndAnd,
#[token("^")]
Caret,
#[token("->")]
Arrow,
#[token(":")]
Colon,
// keywords
#[token("struct")]
Struct,
#[token("fn")]
Fn,
#[token("if")]
If,
#[token("else")]
Else,
#[token("while")]
While,
#[token("loop")]
Loop,
#[regex(r"[a-zA-Z_]\w*")]
Ident(&'a str),
#[regex(r##""[^"]*""##)]
String(&'a str),
#[regex(r"\d+")]
Integer(&'a str),
#[error]
#[regex(r"[ \t\r\n]+", logos::skip)]
Error,
}
pub fn lex<'src>(code: &'src str) -> logos::Lexer<'_, Token<'src>> {
Token::lexer(code)
}
#[cfg(test)]
mod tests {
use crate::lexer::Token;
fn lex_test(str: &str) -> Vec<Token<'_>> {
let lexer = super::lex(str);
lexer.collect()
}
#[test]
fn punctuation() {
let tokens = lex_test("{} [] () .,; = == != >= <= < > + - * / | || & && ^ -> :");
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn whitespace() {
let tokens = lex_test(
".
\r\n \t .",
);
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn idents() {
let tokens = lex_test("hello w_world b235_");
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn literals() {
let tokens = lex_test(r##""hello friend" 5 "morning" 3263475"##);
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn keywords() {
let tokens = lex_test("struct fn . if else while loop;");
insta::assert_debug_snapshot!(tokens);
}
}

12
parser/src/lib.rs Normal file
View file

@ -0,0 +1,12 @@
#![warn(rust_2018_idioms)]
#![allow(dead_code)]
use std::path::PathBuf;
mod ast;
mod lexer;
mod parser;
pub fn parse(_str: &str, _file_name: PathBuf) -> Result<ast::File, ()> {
todo!()
}

304
parser/src/parser.rs Normal file
View file

@ -0,0 +1,304 @@
use std::{ops::Range, path::PathBuf};
use chumsky::{prelude::*, Stream};
use crate::{
ast::{
Assignment, BinOp, BinOpKind, Call, Expr, File, FnDecl, Item, Literal, NameTyPair, Stmt,
StructDecl, Ty, TyKind, VarDecl,
},
lexer::Token,
};
type Error<'src> = Simple<Token<'src>>;
type Span = Range<usize>;
fn ident_parser<'src>() -> impl Parser<Token<'src>, String, Error = Error<'src>> + Clone {
filter_map(|span, tok| match tok {
Token::Ident(ident) => Ok(ident.to_owned()),
_ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))),
})
.labelled("identifier")
.boxed()
}
fn ty_parser<'src>() -> impl Parser<Token<'src>, Ty, Error = Error<'src>> + Clone {
filter_map(|span, token| {
let kind = match token {
Token::Ident("u64") => TyKind::U64,
_ => return Err(Simple::expected_input_found(span, Vec::new(), Some(token))),
};
Ok(Ty { span, kind })
})
.boxed()
}
fn expr_parser<'src>() -> impl Parser<Token<'src>, Expr, Error = Error<'src>> + Clone {
recursive(|expr| {
let literal = filter_map(|span, token| match token {
Token::String(str) => Ok(Expr::Literal(Literal::String(
str[1..str.len() - 2].to_owned(),
span,
))),
// todo lol unwrap
Token::Integer(int) => Ok(Expr::Literal(Literal::Integer(int.parse().unwrap(), span))),
_ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))),
})
.labelled("literal");
// A list of expressions
let items = expr
.clone()
.chain(just(Token::Comma).ignore_then(expr.clone()).repeated())
.then_ignore(just(Token::Comma).or_not())
.or_not()
.map(|item| item.unwrap_or_else(Vec::new));
let array = items
.clone()
.delimited_by(just(Token::BracketO), just(Token::BracketC))
.map(Expr::Array);
let atom = literal
.or(ident_parser().map(|str| Expr::Name(str.to_owned())))
.or(array)
.or(expr
.clone()
.delimited_by(just(Token::ParenO), just(Token::ParenC)))
.boxed();
let call = atom
.then(
items
.delimited_by(just(Token::ParenO), just(Token::ParenC))
.repeated(),
)
.foldl(|callee, args| {
Expr::Call(Call {
callee: Box::new(callee),
args,
})
});
let op = just(Token::Asterisk)
.to(BinOpKind::Mul)
.or(just(Token::Slash).to(BinOpKind::Div));
let product = call
.clone()
.then(op.then(call).repeated())
.foldl(|a, (kind, b)| {
Expr::BinOp(BinOp {
kind,
lhs: Box::new(a),
rhs: Box::new(b),
span: 0..0, // lol todo
})
});
// Sum ops (add and subtract) have equal precedence
let op = just(Token::Plus)
.to(BinOpKind::Add)
.or(just(Token::Minus).to(BinOpKind::Sub));
let sum = product
.clone()
.then(op.then(product).repeated())
.foldl(|a, (kind, b)| {
Expr::BinOp(BinOp {
kind,
lhs: Box::new(a),
rhs: Box::new(b),
span: 0..0, // lol todo
})
});
// Comparison ops (equal, not-equal) have equal precedence
let op = just(Token::EqEq)
.to(BinOpKind::Eq)
.or(just(Token::BangEq).to(BinOpKind::Neq));
let compare = sum
.clone()
.then(op.then(sum).repeated())
.foldl(|a, (kind, b)| {
Expr::BinOp(BinOp {
kind,
lhs: Box::new(a),
rhs: Box::new(b),
span: 0..0, // lol todo
})
});
compare.boxed()
})
}
fn statement_parser<'src>(
item: impl Parser<Token<'src>, Item, Error = Error<'src>> + Clone,
) -> impl Parser<Token<'src>, Stmt, Error = Error<'src>> + Clone {
let var_decl = ty_parser()
.then(ident_parser())
.then_ignore(just(Token::Eq))
.then(expr_parser())
.map(|((ty, name), rhs)| {
Stmt::VarDecl(VarDecl {
name,
ty,
rhs: Some(rhs),
span: Default::default(),
})
});
let assignment = expr_parser()
.then_ignore(just(Token::Eq))
.then(expr_parser())
.map(|(place, rhs)| {
Stmt::Assignment(Assignment {
place,
rhs,
span: Default::default(),
})
});
var_decl
.or(assignment)
.or(expr_parser().map(|expr| Stmt::Expr(expr)))
.or(item.clone().map(|item| Stmt::Item(item)))
.then_ignore(just(Token::Semi))
}
fn name_ty_pair_parser<'src>() -> impl Parser<Token<'src>, NameTyPair, Error = Error<'src>> + Clone
{
ident_parser()
.then_ignore(just(Token::Colon))
.then(ty_parser())
.map_with_span(|(name, ty), span| NameTyPair { name, ty, span })
}
fn function_parser<'src>(
item: impl Parser<Token<'src>, Item, Error = Error<'src>> + Clone,
) -> impl Parser<Token<'src>, FnDecl, Error = Error<'src>> + Clone {
let name = ident_parser();
let params = name_ty_pair_parser()
.separated_by(just(Token::Comma))
.allow_trailing()
.delimited_by(just(Token::ParenO), just(Token::ParenC))
.labelled("function arguments");
let ret_ty = just(Token::Arrow).ignore_then(ty_parser()).or_not();
just(Token::Fn)
.map_with_span(|_, span| span)
.then(name)
.then(params)
.then(ret_ty)
.then(
statement_parser(item.clone())
.repeated()
.delimited_by(just(Token::BraceO), just(Token::BraceC)),
)
.map(|((((fn_span, name), params), ret_ty), body)| FnDecl {
name,
params,
ret_ty,
span: fn_span,
body,
})
.labelled("function")
}
fn struct_parser<'src>() -> impl Parser<Token<'src>, StructDecl, Error = Error<'src>> + Clone {
let name = just(Token::Struct).ignore_then(ident_parser());
let fields = name_ty_pair_parser()
.separated_by(just(Token::Comma))
.delimited_by(just(Token::BraceO), just(Token::BraceC));
name.then(fields).map(|(name, fields)| StructDecl {
name,
fields,
span: Default::default(),
})
}
fn item_parser<'src>() -> impl Parser<Token<'src>, Item, Error = Error<'src>> + Clone {
recursive(|item| {
function_parser(item)
.map(Item::FnDecl)
.or(struct_parser().map(Item::StructDecl))
})
}
fn file_parser<'src>(
file_name: PathBuf,
) -> impl Parser<Token<'src>, File, Error = Error<'src>> + Clone {
item_parser().repeated().map(move |items| File {
name: file_name.clone(),
items,
})
}
pub fn parse<'src, I>(lexer: I, len: usize, file_name: PathBuf) -> (Option<File>, Vec<Error<'src>>)
where
I: 'src,
I: Iterator<Item = (Token<'src>, Span)>,
{
file_parser(file_name).parse_recovery_verbose(Stream::from_iter(len..len + 1, lexer))
}
#[cfg(test)]
mod tests {
use std::{fmt::Debug, path::PathBuf};
use logos::Logos;
use crate::lexer::Token;
fn parse(src: &str) -> impl Debug + '_ {
let lexer = Token::lexer(src);
let len = lexer.source().len();
super::parse(
lexer.spanned(),
len,
PathBuf::from(module_path!().replace("::", "__")),
)
}
#[test]
fn addition() {
let r = parse("fn main() { 1 + 4; }");
insta::assert_debug_snapshot!(r);
}
#[test]
fn expression() {
let r = parse("fn main() { (4 / hallo()) + 5; }");
insta::assert_debug_snapshot!(r)
}
#[test]
fn function() {
let r = parse("fn foo() -> u64 { 1 + 5; }");
insta::assert_debug_snapshot!(r)
}
#[test]
fn nested_function() {
let r = parse("fn foo() { fn foo2() {} fn foo3() {} }");
insta::assert_debug_snapshot!(r)
}
#[test]
fn nested_function2() {
let r = parse("fn foo() { fn foo2() {} 1 + 5; }");
insta::assert_debug_snapshot!(r)
}
#[test]
fn struct_() {
let r = parse("struct X { y: u64, x: u64 }");
insta::assert_debug_snapshot!(r)
}
}

View file

@ -0,0 +1,16 @@
---
source: parser/src/lexer.rs
assertion_line: 125
expression: tokens
---
[
Ident(
"hello",
),
Ident(
"w_world",
),
Ident(
"b235_",
),
]

View file

@ -0,0 +1,15 @@
---
source: parser/src/lexer.rs
assertion_line: 137
expression: tokens
---
[
Struct,
Fn,
Dot,
If,
Else,
While,
Loop,
Semi,
]

View file

@ -0,0 +1,19 @@
---
source: parser/src/lexer.rs
assertion_line: 131
expression: tokens
---
[
String(
"\"hello friend\"",
),
Integer(
"5",
),
String(
"\"morning\"",
),
Integer(
"3263475",
),
]

View file

@ -0,0 +1,34 @@
---
source: parser/src/lexer.rs
assertion_line: 110
expression: tokens
---
[
BraceO,
BraceC,
BracketO,
BracketC,
ParenO,
ParenC,
Dot,
Comma,
Semi,
Eq,
EqEq,
BangEq,
GreaterEq,
LessEq,
Less,
Greater,
Plus,
Minus,
Asterisk,
Slash,
Or,
OrOr,
And,
AndAnd,
Caret,
Arrow,
Colon,
]

View file

@ -0,0 +1,9 @@
---
source: parser/src/lexer.rs
assertion_line: 119
expression: tokens
---
[
Dot,
Dot,
]

View file

@ -0,0 +1,45 @@
---
source: parser/src/parser.rs
assertion_line: 272
expression: r
---
(
Some(
File {
name: "parser__parser__tests",
items: [
FnDecl(
FnDecl {
name: "main",
params: [],
ret_ty: None,
span: 0..2,
body: [
Expr(
BinOp(
BinOp {
kind: Add,
lhs: Literal(
Integer(
1,
12..13,
),
),
rhs: Literal(
Integer(
4,
16..17,
),
),
span: 0..0,
},
),
),
],
},
),
],
},
),
[],
)

View file

@ -0,0 +1,59 @@
---
source: parser/src/parser.rs
assertion_line: 278
expression: r
---
(
Some(
File {
name: "parser__parser__tests",
items: [
FnDecl(
FnDecl {
name: "main",
params: [],
ret_ty: None,
span: 0..2,
body: [
Expr(
BinOp(
BinOp {
kind: Add,
lhs: BinOp(
BinOp {
kind: Div,
lhs: Literal(
Integer(
4,
13..14,
),
),
rhs: Call(
Call {
callee: Name(
"hallo",
),
args: [],
},
),
span: 0..0,
},
),
rhs: Literal(
Integer(
5,
28..29,
),
),
span: 0..0,
},
),
),
],
},
),
],
},
),
[],
)

View file

@ -0,0 +1,50 @@
---
source: parser/src/parser.rs
assertion_line: 284
expression: r
---
(
Some(
File {
name: "parser__parser__tests",
items: [
FnDecl(
FnDecl {
name: "foo",
params: [],
ret_ty: Some(
Ty {
span: 12..15,
kind: U64,
},
),
span: 0..2,
body: [
Expr(
BinOp(
BinOp {
kind: Add,
lhs: Literal(
Integer(
1,
18..19,
),
),
rhs: Literal(
Integer(
5,
22..23,
),
),
span: 0..0,
},
),
),
],
},
),
],
},
),
[],
)

View file

@ -0,0 +1,39 @@
---
source: parser/src/parser.rs
assertion_line: 302
expression: r
---
(
Some(
File {
name: "parser__parser__tests",
items: [
StructDecl(
StructDecl {
name: "X",
fields: [
NameTyPair {
name: "y",
ty: Ty {
span: 14..17,
kind: U64,
},
span: 11..17,
},
NameTyPair {
name: "x",
ty: Ty {
span: 22..25,
kind: U64,
},
span: 19..25,
},
],
span: 0..0,
},
),
],
},
),
[],
)