This commit is contained in:
nora 2022-03-21 14:59:18 +01:00
parent 888fcfd170
commit 9e87a4ce91
10 changed files with 355 additions and 4 deletions

View file

@ -8,3 +8,6 @@ edition = "2021"
[dependencies]
chumsky = "0.8.0"
logos = "0.12.0"
[dev-dependencies]
insta = "1.13.0"

View file

@ -137,6 +137,7 @@ pub enum BinOpKind {
Or,
BitAnd,
BitOr,
Xor,
}
#[derive(Debug, Clone, PartialEq)]

View file

@ -1,7 +1,10 @@
use logos::Logos;
#[derive(Logos, Debug, PartialEq)]
pub enum Token {
pub enum Token<'a> {
#[regex("//[^\n]*", logos::skip)]
Comment,
// punctuation
#[token("{")]
BraceO,
@ -17,6 +20,10 @@ pub enum Token {
ParenC,
#[token(".")]
Dot,
#[token(",")]
Comma,
#[token(";")]
Semi,
#[token("=")]
Eq,
#[token("==")]
@ -41,21 +48,88 @@ pub enum Token {
Plus,
#[token("-")]
Minus,
#[token("|")]
Or,
#[token("&")]
And,
#[token("||")]
OrOr,
#[token("&&")]
AndAnd,
#[token("^")]
Caret,
// keywords
#[token("struct")]
Struct,
#[token("fn")]
Fn,
#[token("if")]
If,
#[token("else")]
Else,
#[token("while")]
While,
#[token("loop")]
Loop,
#[regex(r"[a-zA-Z_]\w*")]
Ident(String),
Ident(&'a str),
#[regex(r##""[^"]*""##)]
String(&'a str),
#[regex(r"\d+")]
Integer(&'a str),
#[error]
#[regex(r"[ \t\n\r\f]+"), logos::skip]
#[regex(r"[ \t\r\n]+", logos::skip)]
Error,
}
pub fn lex(code: &str) -> logos::Lexer<'_, Token> {
pub fn lex<'src>(code: &'src str) -> logos::Lexer<'_, Token<'src>> {
Token::lexer(code)
}
#[cfg(test)]
mod tests {
use crate::lexer::Token;
fn lex_test(str: &str) -> Vec<Token<'_>> {
let lexer = super::lex(str);
lexer.collect()
}
#[test]
fn punctuation() {
let tokens = lex_test("{} [] () .,; = == != >= <= < > + - * / | || & && ^");
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn whitespace() {
let tokens = lex_test(
".
\r\n \t .",
);
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn idents() {
let tokens = lex_test("hello w_world b235_");
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn literals() {
let tokens = lex_test(r##""hello friend" 5 "morning" 3263475"##);
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn keywords() {
let tokens = lex_test("struct fn . if else while loop;");
insta::assert_debug_snapshot!(tokens);
}
}

View file

@ -0,0 +1,15 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
Ident(
"hello",
),
Ident(
"w_world",
),
Ident(
"b235_",
),
]

View file

@ -0,0 +1,14 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
Struct,
Fn,
Dot,
If,
Else,
While,
Loop,
Semi,
]

View file

@ -0,0 +1,18 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
String(
"\"hello friend\"",
),
Integer(
"5",
),
String(
"\"morning\"",
),
Integer(
"3263475",
),
]

View file

@ -0,0 +1,31 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
BraceO,
BraceC,
BracketO,
BracketC,
ParenO,
ParenC,
Dot,
Comma,
Semi,
Eq,
EqEq,
BangEq,
GreaterEq,
LessEq,
Less,
Greater,
Plus,
Minus,
Asterisk,
Slash,
Or,
OrOr,
And,
AndAnd,
Caret,
]

View file

@ -0,0 +1,8 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
Dot,
Dot,
]

View file

@ -1,5 +1,22 @@
use std::ops::Range;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Span {
start: usize,
len: usize,
}
impl Span {
pub fn start_end(start: usize, end: usize) -> Self {
Self {
start,
len: end - start,
}
}
}
impl From<Range<usize>> for Span {
fn from(r: Range<usize>) -> Self {
Self::start_end(r.start, r.end)
}
}