lex things

This commit is contained in:
nora 2022-06-21 13:15:26 +02:00
parent cc8eb57148
commit a5d063b944
7 changed files with 268 additions and 16 deletions

3
.rustfmt.toml Normal file
View file

@ -0,0 +1,3 @@
imports_granularity = "Crate"
newline_style = "Unix"
group_imports = "StdExternalCrate"

211
Cargo.lock generated
View file

@ -2,10 +2,90 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "console"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"terminal_size",
"winapi",
]
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "hashbrown"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3"
[[package]]
name = "indexmap"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c6392766afd7964e2531940894cffe4bd8d7d17dbc3c1c4857040fd4b33bdb3"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "insta"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4126dd76ebfe2561486a1bd6738a33d2029ffb068a99ac446b7f8c77b2e58dbc"
dependencies = [
"console",
"once_cell",
"serde",
"serde_json",
"serde_yaml",
"similar",
]
[[package]]
name = "itoa"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"
[[package]]
name = "libc"
version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "linked-hash-map"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
[[package]]
name = "once_cell"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225"
[[package]]
name = "parser"
version = "0.1.0"
dependencies = [
"insta",
"peekmore",
]
@ -18,12 +98,143 @@ dependencies = [
"smallvec",
]
[[package]]
name = "proc-macro2"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ryu"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
[[package]]
name = "serde"
version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.8.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "707d15895415db6628332b737c838b88c598522e4dc70647e59b72312924aebc"
dependencies = [
"indexmap",
"ryu",
"serde",
"yaml-rust",
]
[[package]]
name = "similar"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3"
[[package]]
name = "smallvec"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
[[package]]
name = "syn"
version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "terminal_size"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "unicode-ident"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
[[package]]
name = "uwuc"
version = "0.1.0"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
]

View file

@ -7,3 +7,6 @@ edition = "2021"
[dependencies]
peekmore = { version = "1.0.0", features = ["smallvec"] }
[dev-dependencies]
insta = "1.15.0"

View file

@ -1,3 +1,6 @@
#![allow(dead_code)]
mod lexer;
mod pre;
mod token;
pub type Span = std::ops::Range<usize>;

View file

@ -7,8 +7,9 @@ use std::ops::Not;
use peekmore::PeekMore;
type Span = std::ops::Range<usize>;
use crate::Span;
#[derive(Debug)]
pub enum PToken {
HeaderName(Vec<u8>),
Identifier(Vec<u8>),
@ -20,20 +21,7 @@ pub enum PToken {
Error,
}
pub enum Token {
Keyword(Keyword),
Identifier(),
Constant(),
StringLiteral(),
Punctuator(Punctuator),
}
pub enum Keyword {}
pub enum Constant {
Integer(i64),
}
#[derive(Debug)]
pub enum Punctuator {
/// [ <:
BracketOpen,
@ -363,3 +351,23 @@ pub fn preprocess_tokens(
};
lexer
}
#[cfg(test)]
mod tests {
fn lex_test(str: &str) {
let bytes = str.bytes().enumerate();
let tokens = super::preprocess_tokens(bytes);
let tokens = tokens.collect::<Vec<_>>();
insta::assert_debug_snapshot!(tokens);
}
#[test]
fn hello_world() {
let src = r#"\
int main() {
puts("Hello, World!");
}
"#;
lex_test(src);
}
}

3
parser/src/pre/mod.rs Normal file
View file

@ -0,0 +1,3 @@
mod lexer;
pub use lexer::{preprocess_tokens, PToken, Punctuator};

21
parser/src/token.rs Normal file
View file

@ -0,0 +1,21 @@
use crate::{
pre::{PToken, Punctuator},
Span,
};
pub enum Token {
Keyword(Keyword),
Identifier(),
Constant(),
StringLiteral(),
Punctuator(Punctuator),
Error,
}
pub struct Keyword;
fn from_pre_toks(
pre_toks: impl Iterator<Item = (PToken, Span)>,
) -> impl IntoIterator<Item = (Token, Span)> {
pre_toks.map(|token| todo!())
}