mirror of
https://github.com/Noratrieb/ub.git
synced 2026-01-14 16:45:05 +01:00
lexer!
This commit is contained in:
parent
888fcfd170
commit
9e87a4ce91
10 changed files with 355 additions and 4 deletions
170
Cargo.lock
generated
170
Cargo.lock
generated
|
|
@ -11,6 +11,12 @@ dependencies = [
|
||||||
"const-random",
|
"const-random",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "beef"
|
name = "beef"
|
||||||
version = "0.5.1"
|
version = "0.5.1"
|
||||||
|
|
@ -32,6 +38,19 @@ dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "console"
|
||||||
|
version = "0.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31"
|
||||||
|
dependencies = [
|
||||||
|
"encode_unicode",
|
||||||
|
"libc",
|
||||||
|
"once_cell",
|
||||||
|
"terminal_size",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "const-random"
|
name = "const-random"
|
||||||
version = "0.1.13"
|
version = "0.1.13"
|
||||||
|
|
@ -60,6 +79,12 @@ version = "0.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
|
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encode_unicode"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fnv"
|
name = "fnv"
|
||||||
version = "1.0.7"
|
version = "1.0.7"
|
||||||
|
|
@ -77,6 +102,42 @@ dependencies = [
|
||||||
"wasi",
|
"wasi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.11.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indexmap"
|
||||||
|
version = "1.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"hashbrown",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "insta"
|
||||||
|
version = "1.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "30a7e1911532a662f6b08b68f884080850f2fd9544963c3ab23a5af42bda1eac"
|
||||||
|
dependencies = [
|
||||||
|
"console",
|
||||||
|
"once_cell",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"serde_yaml",
|
||||||
|
"similar",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itoa"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
|
|
@ -89,6 +150,12 @@ version = "0.2.121"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linked-hash-map"
|
||||||
|
version = "0.5.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "logos"
|
name = "logos"
|
||||||
version = "0.12.0"
|
version = "0.12.0"
|
||||||
|
|
@ -113,6 +180,12 @@ dependencies = [
|
||||||
"utf8-ranges",
|
"utf8-ranges",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro-hack"
|
name = "proc-macro-hack"
|
||||||
version = "0.5.19"
|
version = "0.5.19"
|
||||||
|
|
@ -143,6 +216,61 @@ version = "0.6.25"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ryu"
|
||||||
|
version = "1.0.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde"
|
||||||
|
version = "1.0.136"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
|
||||||
|
dependencies = [
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_derive"
|
||||||
|
version = "1.0.136"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_json"
|
||||||
|
version = "1.0.79"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
|
||||||
|
dependencies = [
|
||||||
|
"itoa",
|
||||||
|
"ryu",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_yaml"
|
||||||
|
version = "0.8.23"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a4a521f2940385c165a24ee286aa8599633d162077a54bdcae2a6fd5a7bfa7a0"
|
||||||
|
dependencies = [
|
||||||
|
"indexmap",
|
||||||
|
"ryu",
|
||||||
|
"serde",
|
||||||
|
"yaml-rust",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "similar"
|
||||||
|
version = "2.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.89"
|
version = "1.0.89"
|
||||||
|
|
@ -154,6 +282,16 @@ dependencies = [
|
||||||
"unicode-xid",
|
"unicode-xid",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "terminal_size"
|
||||||
|
version = "0.1.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tiny-keccak"
|
name = "tiny-keccak"
|
||||||
version = "2.0.2"
|
version = "2.0.2"
|
||||||
|
|
@ -175,6 +313,7 @@ name = "ub_parser"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chumsky",
|
"chumsky",
|
||||||
|
"insta",
|
||||||
"logos",
|
"logos",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -195,3 +334,34 @@ name = "wasi"
|
||||||
version = "0.10.2+wasi-snapshot-preview1"
|
version = "0.10.2+wasi-snapshot-preview1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
|
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yaml-rust"
|
||||||
|
version = "0.4.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
|
||||||
|
dependencies = [
|
||||||
|
"linked-hash-map",
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -8,3 +8,6 @@ edition = "2021"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
chumsky = "0.8.0"
|
chumsky = "0.8.0"
|
||||||
logos = "0.12.0"
|
logos = "0.12.0"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
insta = "1.13.0"
|
||||||
|
|
|
||||||
|
|
@ -137,6 +137,7 @@ pub enum BinOpKind {
|
||||||
Or,
|
Or,
|
||||||
BitAnd,
|
BitAnd,
|
||||||
BitOr,
|
BitOr,
|
||||||
|
Xor,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
use logos::Logos;
|
use logos::Logos;
|
||||||
|
|
||||||
#[derive(Logos, Debug, PartialEq)]
|
#[derive(Logos, Debug, PartialEq)]
|
||||||
pub enum Token {
|
pub enum Token<'a> {
|
||||||
|
#[regex("//[^\n]*", logos::skip)]
|
||||||
|
Comment,
|
||||||
|
|
||||||
// punctuation
|
// punctuation
|
||||||
#[token("{")]
|
#[token("{")]
|
||||||
BraceO,
|
BraceO,
|
||||||
|
|
@ -17,6 +20,10 @@ pub enum Token {
|
||||||
ParenC,
|
ParenC,
|
||||||
#[token(".")]
|
#[token(".")]
|
||||||
Dot,
|
Dot,
|
||||||
|
#[token(",")]
|
||||||
|
Comma,
|
||||||
|
#[token(";")]
|
||||||
|
Semi,
|
||||||
#[token("=")]
|
#[token("=")]
|
||||||
Eq,
|
Eq,
|
||||||
#[token("==")]
|
#[token("==")]
|
||||||
|
|
@ -41,21 +48,88 @@ pub enum Token {
|
||||||
Plus,
|
Plus,
|
||||||
#[token("-")]
|
#[token("-")]
|
||||||
Minus,
|
Minus,
|
||||||
|
#[token("|")]
|
||||||
|
Or,
|
||||||
|
#[token("&")]
|
||||||
|
And,
|
||||||
|
#[token("||")]
|
||||||
|
OrOr,
|
||||||
|
#[token("&&")]
|
||||||
|
AndAnd,
|
||||||
|
#[token("^")]
|
||||||
|
Caret,
|
||||||
|
|
||||||
// keywords
|
// keywords
|
||||||
#[token("struct")]
|
#[token("struct")]
|
||||||
Struct,
|
Struct,
|
||||||
#[token("fn")]
|
#[token("fn")]
|
||||||
Fn,
|
Fn,
|
||||||
|
#[token("if")]
|
||||||
|
If,
|
||||||
|
#[token("else")]
|
||||||
|
Else,
|
||||||
|
#[token("while")]
|
||||||
|
While,
|
||||||
|
#[token("loop")]
|
||||||
|
Loop,
|
||||||
|
|
||||||
#[regex(r"[a-zA-Z_]\w*")]
|
#[regex(r"[a-zA-Z_]\w*")]
|
||||||
Ident(String),
|
Ident(&'a str),
|
||||||
|
|
||||||
|
#[regex(r##""[^"]*""##)]
|
||||||
|
String(&'a str),
|
||||||
|
|
||||||
|
#[regex(r"\d+")]
|
||||||
|
Integer(&'a str),
|
||||||
|
|
||||||
#[error]
|
#[error]
|
||||||
#[regex(r"[ \t\n\r\f]+"), logos::skip]
|
#[regex(r"[ \t\r\n]+", logos::skip)]
|
||||||
Error,
|
Error,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lex(code: &str) -> logos::Lexer<'_, Token> {
|
pub fn lex<'src>(code: &'src str) -> logos::Lexer<'_, Token<'src>> {
|
||||||
Token::lexer(code)
|
Token::lexer(code)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::lexer::Token;
|
||||||
|
|
||||||
|
fn lex_test(str: &str) -> Vec<Token<'_>> {
|
||||||
|
let lexer = super::lex(str);
|
||||||
|
lexer.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn punctuation() {
|
||||||
|
let tokens = lex_test("{} [] () .,; = == != >= <= < > + - * / | || & && ^");
|
||||||
|
insta::assert_debug_snapshot!(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn whitespace() {
|
||||||
|
let tokens = lex_test(
|
||||||
|
".
|
||||||
|
\r\n \t .",
|
||||||
|
);
|
||||||
|
insta::assert_debug_snapshot!(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn idents() {
|
||||||
|
let tokens = lex_test("hello w_world b235_");
|
||||||
|
insta::assert_debug_snapshot!(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn literals() {
|
||||||
|
let tokens = lex_test(r##""hello friend" 5 "morning" 3263475"##);
|
||||||
|
insta::assert_debug_snapshot!(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keywords() {
|
||||||
|
let tokens = lex_test("struct fn . if else while loop;");
|
||||||
|
insta::assert_debug_snapshot!(tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
15
ub_parser/src/snapshots/ub_parser__lexer__tests__idents.snap
Normal file
15
ub_parser/src/snapshots/ub_parser__lexer__tests__idents.snap
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
---
|
||||||
|
source: ub_parser/src/lexer.rs
|
||||||
|
expression: tokens
|
||||||
|
---
|
||||||
|
[
|
||||||
|
Ident(
|
||||||
|
"hello",
|
||||||
|
),
|
||||||
|
Ident(
|
||||||
|
"w_world",
|
||||||
|
),
|
||||||
|
Ident(
|
||||||
|
"b235_",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
---
|
||||||
|
source: ub_parser/src/lexer.rs
|
||||||
|
expression: tokens
|
||||||
|
---
|
||||||
|
[
|
||||||
|
Struct,
|
||||||
|
Fn,
|
||||||
|
Dot,
|
||||||
|
If,
|
||||||
|
Else,
|
||||||
|
While,
|
||||||
|
Loop,
|
||||||
|
Semi,
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
---
|
||||||
|
source: ub_parser/src/lexer.rs
|
||||||
|
expression: tokens
|
||||||
|
---
|
||||||
|
[
|
||||||
|
String(
|
||||||
|
"\"hello friend\"",
|
||||||
|
),
|
||||||
|
Integer(
|
||||||
|
"5",
|
||||||
|
),
|
||||||
|
String(
|
||||||
|
"\"morning\"",
|
||||||
|
),
|
||||||
|
Integer(
|
||||||
|
"3263475",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
---
|
||||||
|
source: ub_parser/src/lexer.rs
|
||||||
|
expression: tokens
|
||||||
|
---
|
||||||
|
[
|
||||||
|
BraceO,
|
||||||
|
BraceC,
|
||||||
|
BracketO,
|
||||||
|
BracketC,
|
||||||
|
ParenO,
|
||||||
|
ParenC,
|
||||||
|
Dot,
|
||||||
|
Comma,
|
||||||
|
Semi,
|
||||||
|
Eq,
|
||||||
|
EqEq,
|
||||||
|
BangEq,
|
||||||
|
GreaterEq,
|
||||||
|
LessEq,
|
||||||
|
Less,
|
||||||
|
Greater,
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
Asterisk,
|
||||||
|
Slash,
|
||||||
|
Or,
|
||||||
|
OrOr,
|
||||||
|
And,
|
||||||
|
AndAnd,
|
||||||
|
Caret,
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
---
|
||||||
|
source: ub_parser/src/lexer.rs
|
||||||
|
expression: tokens
|
||||||
|
---
|
||||||
|
[
|
||||||
|
Dot,
|
||||||
|
Dot,
|
||||||
|
]
|
||||||
|
|
@ -1,5 +1,22 @@
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
pub struct Span {
|
pub struct Span {
|
||||||
start: usize,
|
start: usize,
|
||||||
len: usize,
|
len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Span {
|
||||||
|
pub fn start_end(start: usize, end: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
start,
|
||||||
|
len: end - start,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Range<usize>> for Span {
|
||||||
|
fn from(r: Range<usize>) -> Self {
|
||||||
|
Self::start_end(r.start, r.end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue