This commit is contained in:
nora 2023-03-04 12:14:33 +01:00
parent 2fd78566a3
commit 6260ca0307
7 changed files with 430 additions and 71 deletions

View file

@ -1,36 +1,35 @@
use std::{ops::Range, path::PathBuf};
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NodeId(u32);
type Span = Range<usize>;
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct File {
pub name: PathBuf,
pub items: Vec<Item>,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Ty {
pub span: Span,
pub kind: TyKind,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TyKind {
U64,
Ptr(Box<Ty>),
Name(String),
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Item {
FnDecl(FnDecl),
StructDecl(StructDecl),
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FnDecl {
pub name: String,
pub params: Vec<NameTyPair>,
@ -40,7 +39,7 @@ pub struct FnDecl {
pub body: Vec<Stmt>,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NameTyPair {
pub name: String,
pub ty: Ty,
@ -48,7 +47,7 @@ pub struct NameTyPair {
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct StructDecl {
pub name: String,
pub fields: Vec<NameTyPair>,
@ -56,7 +55,7 @@ pub struct StructDecl {
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Stmt {
VarDecl(VarDecl),
Assignment(Assignment),
@ -67,7 +66,7 @@ pub enum Stmt {
Expr(Expr),
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct VarDecl {
pub name: String,
pub ty: Option<Ty>,
@ -75,14 +74,14 @@ pub struct VarDecl {
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Assignment {
pub place: Expr,
pub rhs: Expr,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct IfStmt {
pub cond: Expr,
pub body: Vec<Stmt>,
@ -90,33 +89,33 @@ pub struct IfStmt {
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ElsePart {
Else(Vec<Stmt>, Span),
ElseIf(Box<IfStmt>),
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WhileStmt {
pub cond: Expr,
pub body: Vec<Stmt>,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LoopStmt {
pub body: Vec<Stmt>,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Expr {
pub kind: ExprKind,
pub id: NodeId,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ExprKind {
BinOp(BinOp),
UnaryOp(UnaryOp),
@ -127,7 +126,7 @@ pub enum ExprKind {
Array(Vec<Expr>),
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BinOp {
pub kind: BinOpKind,
pub lhs: Box<Expr>,
@ -157,7 +156,7 @@ pub enum BinOpKind {
Xor,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct UnaryOp {
pub expr: Box<Expr>,
pub kind: UnaryOpKind,
@ -172,13 +171,13 @@ pub enum UnaryOpKind {
AddrOf,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FieldAccess {
pub expr: Box<Expr>,
pub field_name: String,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Call {
pub callee: Box<Expr>,
pub args: Vec<Expr>,

View file

@ -3,7 +3,7 @@ use std::fmt::{Debug, Display, Formatter};
use logos::Logos;
#[derive(Logos, Debug, Clone, Hash, PartialEq, Eq)]
pub enum Token<'a> {
pub enum Token {
#[regex("//[^\n]*", logos::skip)]
Comment,
@ -83,21 +83,21 @@ pub enum Token<'a> {
#[token("let")]
Let,
#[regex(r"[a-zA-Z_]\w*")]
Ident(&'a str),
#[regex(r"[a-zA-Z_]\w*", |lex| lex.slice().to_string())]
Ident(String),
#[regex(r##""[^"]*""##)]
String(&'a str),
#[regex(r##""[^"]*""##, |lex| lex.slice().to_string())]
String(String),
#[regex(r"\d+")]
Integer(&'a str),
#[regex(r"\d+", |lex| lex.slice().parse())]
Integer(u64),
#[error]
#[regex(r"[ \t\r\n]+", logos::skip)]
Error,
}
impl<'a> Display for Token<'a> {
impl Display for Token {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Token::Comment => f.write_str("comment"),
@ -145,7 +145,7 @@ impl<'a> Display for Token<'a> {
}
}
pub fn lex<'src>(code: &'src str) -> logos::Lexer<'_, Token<'src>> {
pub fn lex<'src>(code: &'src str) -> logos::Lexer<'_, Token> {
Token::lexer(code)
}
@ -153,7 +153,7 @@ pub fn lex<'src>(code: &'src str) -> logos::Lexer<'_, Token<'src>> {
mod tests {
use crate::lexer::Token;
fn lex_test(str: &str) -> Vec<Token<'_>> {
fn lex_test(str: &str) -> Vec<Token> {
let lexer = super::lex(str);
lexer.collect()
}

View file

@ -5,7 +5,6 @@ use std::path::PathBuf;
use ariadne::{Color, Fmt, Label, Report, ReportKind, Source};
use chumsky::prelude::Simple;
use logos::Logos;
use crate::lexer::Token;
@ -14,10 +13,34 @@ mod lexer;
mod parser;
mod pretty;
pub fn parse(_str: &str, _file_name: PathBuf) -> Result<ast::File, ()> {
todo!()
#[salsa::input]
pub struct SourceProgram {
#[return_ref]
pub text: String,
#[return_ref]
pub file_name: PathBuf,
}
#[salsa::jar(db = Db)]
pub struct Jar(SourceProgram, Diagnostics, crate::parser::parse);
pub trait Db: salsa::DbWithJar<Jar> {}
impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}
#[salsa::accumulator]
pub struct Diagnostics(Simple<Token>);
#[derive(Default)]
#[salsa::db(crate::Jar)]
pub(crate) struct Database {
storage: salsa::Storage<Self>,
}
impl salsa::Database for Database {}
// aaa
pub fn test() {
let src = "
fn main(uwu: u64, owo: ptr WOW) -> ptr u64 {
@ -34,11 +57,10 @@ fn main(uwu: u64, owo: ptr WOW) -> ptr u64 {
fn aa() {}
";
let lexer = Token::lexer(src);
let len = lexer.source().len();
let state = parser::ParserState::default();
let db = Database::default();
let source_program = SourceProgram::new(&db, src.to_string(), "uwu.ub".into());
let (file, errors) = parser::parse(lexer.spanned(), &state, len, "test_file".into());
let (file, errors) = parser::parse(&db, source_program);
if let Some(file) = file {
println!("{}", pretty::pretty_print_ast(&file));
@ -47,10 +69,10 @@ fn aa() {}
report_errors(src, errors);
}
fn report_errors(src: &str, errors: Vec<Simple<Token<'_>>>) {
fn report_errors(src: &str, errors: Vec<parser::Error>) {
errors
.into_iter()
.map(|e| e.map(|c| c.to_string()))
.map(|e| e.0.map(|c| c.to_string()))
.for_each(|e| {
let report = Report::build(ReportKind::Error, (), e.span().start);

View file

@ -1,6 +1,7 @@
use std::{cell::Cell, ops::Range, path::PathBuf};
use chumsky::{prelude::*, Stream};
use logos::Logos;
use crate::{
ast::{
@ -9,10 +10,34 @@ use crate::{
WhileStmt,
},
lexer::Token,
SourceProgram,
};
type Error<'src> = Simple<Token<'src>>;
type Span = Range<usize>;
#[derive(Debug, Clone, PartialEq)]
pub struct Error(pub chumsky::error::Simple<Token>);
impl Eq for Error {}
impl chumsky::Error<Token> for Error {
type Span = Span;
type Label = &'static str;
fn expected_input_found<Iter: IntoIterator<Item = Option<Token>>>(
span: Self::Span,
expected: Iter,
found: Option<Token>,
) -> Self {
Self(<_>::expected_input_found(span, expected, found))
}
fn with_label(self, label: Self::Label) -> Self {
Self(self.0.with_label(label))
}
fn merge(self, other: Self) -> Self {
Self(self.0.merge(other.0))
}
}
pub type Span = Range<usize>;
#[derive(Default)]
pub struct ParserState {
@ -27,19 +52,25 @@ impl ParserState {
}
}
fn ident_parser<'src>() -> impl Parser<Token<'src>, String, Error = Error<'src>> + Clone {
fn ident_parser() -> impl Parser<Token, String, Error = Error> + Clone {
let ident = select! {
Token::Ident(ident) => ident.to_owned(),
};
ident.labelled("identifier").boxed()
}
fn ty_parser<'src>() -> impl Parser<Token<'src>, Ty, Error = Error<'src>> + Clone {
fn ty_parser() -> impl Parser<Token, Ty, Error = Error> + Clone {
recursive(|ty_parser| {
let primitive = filter_map(|span, token| {
let kind = match token {
Token::Ident("u64") => TyKind::U64,
_ => return Err(Simple::expected_input_found(span, Vec::new(), Some(token))),
Token::Ident(name) => TyKind::Name(name),
_ => {
return Err(Error(Simple::expected_input_found(
span,
Vec::new(),
Some(token),
)))
}
};
Ok(Ty { span, kind })
})
@ -66,7 +97,7 @@ fn ty_parser<'src>() -> impl Parser<Token<'src>, Ty, Error = Error<'src>> + Clon
fn expr_parser<'src>(
state: &'src ParserState,
) -> impl Parser<Token<'src>, Expr, Error = Error<'src>> + Clone + 'src {
) -> impl Parser<Token, Expr, Error = Error> + Clone + 'src {
recursive(|expr| {
let literal = filter_map(|span: Span, token| match token {
Token::String(str) => Ok(Expr {
@ -79,11 +110,15 @@ fn expr_parser<'src>(
}),
// todo lol unwrap
Token::Integer(int) => Ok(Expr {
kind: ExprKind::Literal(Literal::Integer(int.parse().unwrap(), span.clone())),
kind: ExprKind::Literal(Literal::Integer(int, span.clone())),
id: state.next_id(),
span,
}),
_ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))),
_ => Err(Error(Simple::expected_input_found(
span,
Vec::new(),
Some(token),
))),
})
.labelled("literal");
@ -231,7 +266,7 @@ fn expr_parser<'src>(
fn statement_parser<'src>(
state: &'src ParserState,
) -> impl Parser<Token<'src>, Stmt, Error = Error<'src>> + Clone {
) -> impl Parser<Token, Stmt, Error = Error> + Clone + 'src {
recursive(|stmt| {
let var_decl = just(Token::Let)
.ignore_then(ident_parser())
@ -308,7 +343,7 @@ fn statement_parser<'src>(
fn name_ty_pair_parser<'src>(
state: &'src ParserState,
) -> impl Parser<Token<'src>, NameTyPair, Error = Error<'src>> + Clone {
) -> impl Parser<Token, NameTyPair, Error = Error> + Clone + 'src {
ident_parser()
.then_ignore(just(Token::Colon))
.then(ty_parser())
@ -322,7 +357,7 @@ fn name_ty_pair_parser<'src>(
fn struct_parser<'src>(
state: &'src ParserState,
) -> impl Parser<Token<'src>, StructDecl, Error = Error<'src>> + Clone {
) -> impl Parser<Token, StructDecl, Error = Error> + Clone + 'src {
let name = just(Token::Struct).ignore_then(ident_parser());
let fields = name_ty_pair_parser(state)
@ -341,7 +376,7 @@ fn struct_parser<'src>(
fn item_parser<'src>(
state: &'src ParserState,
) -> impl Parser<Token<'src>, Item, Error = Error<'src>> + Clone {
) -> impl Parser<Token, Item, Error = Error> + Clone + 'src {
// ---- function
let name = ident_parser();
@ -383,7 +418,7 @@ fn item_parser<'src>(
fn file_parser<'src>(
file_name: PathBuf,
state: &'src ParserState,
) -> impl Parser<Token<'src>, File, Error = Error<'src>> + Clone {
) -> impl Parser<Token, File, Error = Error> + Clone + 'src {
item_parser(state)
.repeated()
.then_ignore(end())
@ -394,20 +429,18 @@ fn file_parser<'src>(
.labelled("file")
}
pub fn parse<'src, I>(
lexer: I,
state: &'src ParserState,
len: usize,
file_name: PathBuf,
) -> (Option<File>, Vec<Error<'src>>)
where
I: 'src,
I: Iterator<Item = (Token<'src>, Span)>,
{
file_parser(file_name, state).parse_recovery_verbose(Stream::from_iter(len..len + 1, lexer))
#[salsa::tracked]
pub fn parse(db: &dyn crate::Db, source: SourceProgram) -> (Option<File>, Vec<Error>) {
let lexer = Token::lexer(source.text(db));
let len = lexer.source().len();
let state = ParserState::default();
let result = file_parser(source.file_name(db).clone(), &state)
.parse_recovery_verbose(Stream::from_iter(len..len + 1, lexer.spanned()));
result
}
#[cfg(test)]
#[cfg(disabled)]
mod tests {
use std::{fmt::Debug, path::PathBuf};
@ -492,7 +525,10 @@ mod tests {
fn var_decl() {
let state = ParserState::default();
let r = parse("fn foo() -> u64 { let hello: u64 = 5; let owo = 0; let nice: u64; let nothing; }", &state);
let r = parse(
"fn foo() -> u64 { let hello: u64 = 5; let owo = 0; let nice: u64; let nothing; }",
&state,
);
insta::assert_debug_snapshot!(r);
}

View file

@ -71,7 +71,6 @@ impl Printer {
fn print_ty(&mut self, ty: &Ty) {
match &ty.kind {
TyKind::U64 => self.word("u64"),
TyKind::Name(name) => self.word(name),
TyKind::Ptr(ty) => {
self.word("ptr ");