mirror of
https://github.com/Noratrieb/ub.git
synced 2026-01-14 08:35:06 +01:00
lexer!
This commit is contained in:
parent
888fcfd170
commit
9e87a4ce91
10 changed files with 355 additions and 4 deletions
170
Cargo.lock
generated
170
Cargo.lock
generated
|
|
@ -11,6 +11,12 @@ dependencies = [
|
|||
"const-random",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "beef"
|
||||
version = "0.5.1"
|
||||
|
|
@ -32,6 +38,19 @@ dependencies = [
|
|||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31"
|
||||
dependencies = [
|
||||
"encode_unicode",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"terminal_size",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const-random"
|
||||
version = "0.1.13"
|
||||
|
|
@ -60,6 +79,12 @@ version = "0.2.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
|
||||
|
||||
[[package]]
|
||||
name = "encode_unicode"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
|
|
@ -77,6 +102,42 @@ dependencies = [
|
|||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "insta"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30a7e1911532a662f6b08b68f884080850f2fd9544963c3ab23a5af42bda1eac"
|
||||
dependencies = [
|
||||
"console",
|
||||
"once_cell",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"similar",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
|
|
@ -89,6 +150,12 @@ version = "0.2.121"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
||||
|
||||
[[package]]
|
||||
name = "linked-hash-map"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
|
||||
|
||||
[[package]]
|
||||
name = "logos"
|
||||
version = "0.12.0"
|
||||
|
|
@ -113,6 +180,12 @@ dependencies = [
|
|||
"utf8-ranges",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-hack"
|
||||
version = "0.5.19"
|
||||
|
|
@ -143,6 +216,61 @@ version = "0.6.25"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.136"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.136"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_yaml"
|
||||
version = "0.8.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4a521f2940385c165a24ee286aa8599633d162077a54bdcae2a6fd5a7bfa7a0"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"ryu",
|
||||
"serde",
|
||||
"yaml-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "similar"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.89"
|
||||
|
|
@ -154,6 +282,16 @@ dependencies = [
|
|||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "terminal_size"
|
||||
version = "0.1.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tiny-keccak"
|
||||
version = "2.0.2"
|
||||
|
|
@ -175,6 +313,7 @@ name = "ub_parser"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chumsky",
|
||||
"insta",
|
||||
"logos",
|
||||
]
|
||||
|
||||
|
|
@ -195,3 +334,34 @@ name = "wasi"
|
|||
version = "0.10.2+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "yaml-rust"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
|
||||
dependencies = [
|
||||
"linked-hash-map",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -8,3 +8,6 @@ edition = "2021"
|
|||
[dependencies]
|
||||
chumsky = "0.8.0"
|
||||
logos = "0.12.0"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.13.0"
|
||||
|
|
|
|||
|
|
@ -137,6 +137,7 @@ pub enum BinOpKind {
|
|||
Or,
|
||||
BitAnd,
|
||||
BitOr,
|
||||
Xor,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
use logos::Logos;
|
||||
|
||||
#[derive(Logos, Debug, PartialEq)]
|
||||
pub enum Token {
|
||||
pub enum Token<'a> {
|
||||
#[regex("//[^\n]*", logos::skip)]
|
||||
Comment,
|
||||
|
||||
// punctuation
|
||||
#[token("{")]
|
||||
BraceO,
|
||||
|
|
@ -17,6 +20,10 @@ pub enum Token {
|
|||
ParenC,
|
||||
#[token(".")]
|
||||
Dot,
|
||||
#[token(",")]
|
||||
Comma,
|
||||
#[token(";")]
|
||||
Semi,
|
||||
#[token("=")]
|
||||
Eq,
|
||||
#[token("==")]
|
||||
|
|
@ -41,21 +48,88 @@ pub enum Token {
|
|||
Plus,
|
||||
#[token("-")]
|
||||
Minus,
|
||||
#[token("|")]
|
||||
Or,
|
||||
#[token("&")]
|
||||
And,
|
||||
#[token("||")]
|
||||
OrOr,
|
||||
#[token("&&")]
|
||||
AndAnd,
|
||||
#[token("^")]
|
||||
Caret,
|
||||
|
||||
// keywords
|
||||
#[token("struct")]
|
||||
Struct,
|
||||
#[token("fn")]
|
||||
Fn,
|
||||
#[token("if")]
|
||||
If,
|
||||
#[token("else")]
|
||||
Else,
|
||||
#[token("while")]
|
||||
While,
|
||||
#[token("loop")]
|
||||
Loop,
|
||||
|
||||
#[regex(r"[a-zA-Z_]\w*")]
|
||||
Ident(String),
|
||||
Ident(&'a str),
|
||||
|
||||
#[regex(r##""[^"]*""##)]
|
||||
String(&'a str),
|
||||
|
||||
#[regex(r"\d+")]
|
||||
Integer(&'a str),
|
||||
|
||||
#[error]
|
||||
#[regex(r"[ \t\n\r\f]+"), logos::skip]
|
||||
#[regex(r"[ \t\r\n]+", logos::skip)]
|
||||
Error,
|
||||
}
|
||||
|
||||
pub fn lex(code: &str) -> logos::Lexer<'_, Token> {
|
||||
pub fn lex<'src>(code: &'src str) -> logos::Lexer<'_, Token<'src>> {
|
||||
Token::lexer(code)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexer::Token;
|
||||
|
||||
fn lex_test(str: &str) -> Vec<Token<'_>> {
|
||||
let lexer = super::lex(str);
|
||||
lexer.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn punctuation() {
|
||||
let tokens = lex_test("{} [] () .,; = == != >= <= < > + - * / | || & && ^");
|
||||
insta::assert_debug_snapshot!(tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn whitespace() {
|
||||
let tokens = lex_test(
|
||||
".
|
||||
\r\n \t .",
|
||||
);
|
||||
insta::assert_debug_snapshot!(tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn idents() {
|
||||
let tokens = lex_test("hello w_world b235_");
|
||||
insta::assert_debug_snapshot!(tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn literals() {
|
||||
let tokens = lex_test(r##""hello friend" 5 "morning" 3263475"##);
|
||||
insta::assert_debug_snapshot!(tokens);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keywords() {
|
||||
let tokens = lex_test("struct fn . if else while loop;");
|
||||
insta::assert_debug_snapshot!(tokens);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
15
ub_parser/src/snapshots/ub_parser__lexer__tests__idents.snap
Normal file
15
ub_parser/src/snapshots/ub_parser__lexer__tests__idents.snap
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
---
|
||||
source: ub_parser/src/lexer.rs
|
||||
expression: tokens
|
||||
---
|
||||
[
|
||||
Ident(
|
||||
"hello",
|
||||
),
|
||||
Ident(
|
||||
"w_world",
|
||||
),
|
||||
Ident(
|
||||
"b235_",
|
||||
),
|
||||
]
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
---
|
||||
source: ub_parser/src/lexer.rs
|
||||
expression: tokens
|
||||
---
|
||||
[
|
||||
Struct,
|
||||
Fn,
|
||||
Dot,
|
||||
If,
|
||||
Else,
|
||||
While,
|
||||
Loop,
|
||||
Semi,
|
||||
]
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
source: ub_parser/src/lexer.rs
|
||||
expression: tokens
|
||||
---
|
||||
[
|
||||
String(
|
||||
"\"hello friend\"",
|
||||
),
|
||||
Integer(
|
||||
"5",
|
||||
),
|
||||
String(
|
||||
"\"morning\"",
|
||||
),
|
||||
Integer(
|
||||
"3263475",
|
||||
),
|
||||
]
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
---
|
||||
source: ub_parser/src/lexer.rs
|
||||
expression: tokens
|
||||
---
|
||||
[
|
||||
BraceO,
|
||||
BraceC,
|
||||
BracketO,
|
||||
BracketC,
|
||||
ParenO,
|
||||
ParenC,
|
||||
Dot,
|
||||
Comma,
|
||||
Semi,
|
||||
Eq,
|
||||
EqEq,
|
||||
BangEq,
|
||||
GreaterEq,
|
||||
LessEq,
|
||||
Less,
|
||||
Greater,
|
||||
Plus,
|
||||
Minus,
|
||||
Asterisk,
|
||||
Slash,
|
||||
Or,
|
||||
OrOr,
|
||||
And,
|
||||
AndAnd,
|
||||
Caret,
|
||||
]
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
source: ub_parser/src/lexer.rs
|
||||
expression: tokens
|
||||
---
|
||||
[
|
||||
Dot,
|
||||
Dot,
|
||||
]
|
||||
|
|
@ -1,5 +1,22 @@
|
|||
use std::ops::Range;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct Span {
|
||||
start: usize,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn start_end(start: usize, end: usize) -> Self {
|
||||
Self {
|
||||
start,
|
||||
len: end - start,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Range<usize>> for Span {
|
||||
fn from(r: Range<usize>) -> Self {
|
||||
Self::start_end(r.start, r.end)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue