This commit is contained in:
nora 2022-03-21 14:59:18 +01:00
parent 888fcfd170
commit 9e87a4ce91
10 changed files with 355 additions and 4 deletions

170
Cargo.lock generated
View file

@ -11,6 +11,12 @@ dependencies = [
"const-random", "const-random",
] ]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]] [[package]]
name = "beef" name = "beef"
version = "0.5.1" version = "0.5.1"
@ -32,6 +38,19 @@ dependencies = [
"ahash", "ahash",
] ]
[[package]]
name = "console"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"terminal_size",
"winapi",
]
[[package]] [[package]]
name = "const-random" name = "const-random"
version = "0.1.13" version = "0.1.13"
@ -60,6 +79,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -77,6 +102,42 @@ dependencies = [
"wasi", "wasi",
] ]
[[package]]
name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
[[package]]
name = "indexmap"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "insta"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30a7e1911532a662f6b08b68f884080850f2fd9544963c3ab23a5af42bda1eac"
dependencies = [
"console",
"once_cell",
"serde",
"serde_json",
"serde_yaml",
"similar",
]
[[package]]
name = "itoa"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"
@ -89,6 +150,12 @@ version = "0.2.121"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f" checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
[[package]]
name = "linked-hash-map"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
[[package]] [[package]]
name = "logos" name = "logos"
version = "0.12.0" version = "0.12.0"
@ -113,6 +180,12 @@ dependencies = [
"utf8-ranges", "utf8-ranges",
] ]
[[package]]
name = "once_cell"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
[[package]] [[package]]
name = "proc-macro-hack" name = "proc-macro-hack"
version = "0.5.19" version = "0.5.19"
@ -143,6 +216,61 @@ version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "ryu"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
[[package]]
name = "serde"
version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a521f2940385c165a24ee286aa8599633d162077a54bdcae2a6fd5a7bfa7a0"
dependencies = [
"indexmap",
"ryu",
"serde",
"yaml-rust",
]
[[package]]
name = "similar"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.89" version = "1.0.89"
@ -154,6 +282,16 @@ dependencies = [
"unicode-xid", "unicode-xid",
] ]
[[package]]
name = "terminal_size"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
dependencies = [
"libc",
"winapi",
]
[[package]] [[package]]
name = "tiny-keccak" name = "tiny-keccak"
version = "2.0.2" version = "2.0.2"
@ -175,6 +313,7 @@ name = "ub_parser"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"chumsky", "chumsky",
"insta",
"logos", "logos",
] ]
@ -195,3 +334,34 @@ name = "wasi"
version = "0.10.2+wasi-snapshot-preview1" version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
]

View file

@ -8,3 +8,6 @@ edition = "2021"
[dependencies] [dependencies]
chumsky = "0.8.0" chumsky = "0.8.0"
logos = "0.12.0" logos = "0.12.0"
[dev-dependencies]
insta = "1.13.0"

View file

@ -137,6 +137,7 @@ pub enum BinOpKind {
Or, Or,
BitAnd, BitAnd,
BitOr, BitOr,
Xor,
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]

View file

@ -1,7 +1,10 @@
use logos::Logos; use logos::Logos;
#[derive(Logos, Debug, PartialEq)] #[derive(Logos, Debug, PartialEq)]
pub enum Token { pub enum Token<'a> {
#[regex("//[^\n]*", logos::skip)]
Comment,
// punctuation // punctuation
#[token("{")] #[token("{")]
BraceO, BraceO,
@ -17,6 +20,10 @@ pub enum Token {
ParenC, ParenC,
#[token(".")] #[token(".")]
Dot, Dot,
#[token(",")]
Comma,
#[token(";")]
Semi,
#[token("=")] #[token("=")]
Eq, Eq,
#[token("==")] #[token("==")]
@ -41,21 +48,88 @@ pub enum Token {
Plus, Plus,
#[token("-")] #[token("-")]
Minus, Minus,
#[token("|")]
Or,
#[token("&")]
And,
#[token("||")]
OrOr,
#[token("&&")]
AndAnd,
#[token("^")]
Caret,
// keywords // keywords
#[token("struct")] #[token("struct")]
Struct, Struct,
#[token("fn")] #[token("fn")]
Fn, Fn,
#[token("if")]
If,
#[token("else")]
Else,
#[token("while")]
While,
#[token("loop")]
Loop,
#[regex(r"[a-zA-Z_]\w*")] #[regex(r"[a-zA-Z_]\w*")]
Ident(String), Ident(&'a str),
#[regex(r##""[^"]*""##)]
String(&'a str),
#[regex(r"\d+")]
Integer(&'a str),
#[error] #[error]
#[regex(r"[ \t\n\r\f]+"), logos::skip] #[regex(r"[ \t\r\n]+", logos::skip)]
Error, Error,
} }
pub fn lex(code: &str) -> logos::Lexer<'_, Token> { pub fn lex<'src>(code: &'src str) -> logos::Lexer<'_, Token<'src>> {
Token::lexer(code) Token::lexer(code)
} }
#[cfg(test)]
mod tests {
use crate::lexer::Token;
fn lex_test(str: &str) -> Vec<Token<'_>> {
let lexer = super::lex(str);
lexer.collect()
}
#[test]
fn punctuation() {
let tokens = lex_test("{} [] () .,; = == != >= <= < > + - * / | || & && ^");
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn whitespace() {
let tokens = lex_test(
".
\r\n \t .",
);
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn idents() {
let tokens = lex_test("hello w_world b235_");
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn literals() {
let tokens = lex_test(r##""hello friend" 5 "morning" 3263475"##);
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn keywords() {
let tokens = lex_test("struct fn . if else while loop;");
insta::assert_debug_snapshot!(tokens);
}
}

View file

@ -0,0 +1,15 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
Ident(
"hello",
),
Ident(
"w_world",
),
Ident(
"b235_",
),
]

View file

@ -0,0 +1,14 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
Struct,
Fn,
Dot,
If,
Else,
While,
Loop,
Semi,
]

View file

@ -0,0 +1,18 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
String(
"\"hello friend\"",
),
Integer(
"5",
),
String(
"\"morning\"",
),
Integer(
"3263475",
),
]

View file

@ -0,0 +1,31 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
BraceO,
BraceC,
BracketO,
BracketC,
ParenO,
ParenC,
Dot,
Comma,
Semi,
Eq,
EqEq,
BangEq,
GreaterEq,
LessEq,
Less,
Greater,
Plus,
Minus,
Asterisk,
Slash,
Or,
OrOr,
And,
AndAnd,
Caret,
]

View file

@ -0,0 +1,8 @@
---
source: ub_parser/src/lexer.rs
expression: tokens
---
[
Dot,
Dot,
]

View file

@ -1,5 +1,22 @@
use std::ops::Range;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Span { pub struct Span {
start: usize, start: usize,
len: usize, len: usize,
} }
impl Span {
pub fn start_end(start: usize, end: usize) -> Self {
Self {
start,
len: end - start,
}
}
}
impl From<Range<usize>> for Span {
fn from(r: Range<usize>) -> Self {
Self::start_end(r.start, r.end)
}
}