diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..4d7dd9e --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,3 @@ +imports_granularity = "Crate" +newline_style = "Unix" +group_imports = "StdExternalCrate" \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 2f7ff23..2483605 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,10 +2,90 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "console" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "terminal_size", + "winapi", +] + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "hashbrown" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" + +[[package]] +name = "indexmap" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c6392766afd7964e2531940894cffe4bd8d7d17dbc3c1c4857040fd4b33bdb3" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "insta" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4126dd76ebfe2561486a1bd6738a33d2029ffb068a99ac446b7f8c77b2e58dbc" +dependencies = [ + "console", + "once_cell", + "serde", + "serde_json", + "serde_yaml", + "similar", +] + +[[package]] +name = "itoa" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "linked-hash-map" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" + +[[package]] +name = "once_cell" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" + [[package]] name = "parser" version = "0.1.0" dependencies = [ + "insta", "peekmore", ] @@ -18,12 +98,143 @@ dependencies = [ "smallvec", ] +[[package]] +name = "proc-macro2" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + +[[package]] +name = "serde" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707d15895415db6628332b737c838b88c598522e4dc70647e59b72312924aebc" +dependencies = [ + "indexmap", + "ryu", + "serde", + "yaml-rust", +] + +[[package]] +name = "similar" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3" + [[package]] name = "smallvec" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +[[package]] +name = "syn" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "terminal_size" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "unicode-ident" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" + [[package]] name = "uwuc" version = "0.1.0" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 7468acb..45dcb69 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -7,3 +7,6 @@ edition = "2021" [dependencies] peekmore = { version = "1.0.0", features = ["smallvec"] } + +[dev-dependencies] +insta = "1.15.0" diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 38f258a..30c1f50 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -1,3 +1,6 @@ #![allow(dead_code)] -mod lexer; +mod pre; +mod token; + +pub type Span = std::ops::Range; diff --git a/parser/src/lexer.rs b/parser/src/pre/lexer.rs similarity index 96% rename from parser/src/lexer.rs rename to parser/src/pre/lexer.rs index 3284e32..966c7cb 100644 --- a/parser/src/lexer.rs +++ b/parser/src/pre/lexer.rs @@ -7,8 +7,9 @@ use std::ops::Not; use peekmore::PeekMore; -type Span = std::ops::Range; +use crate::Span; +#[derive(Debug)] pub enum PToken { HeaderName(Vec), Identifier(Vec), @@ -20,20 +21,7 @@ pub enum PToken { Error, } -pub enum Token { - Keyword(Keyword), - Identifier(), - Constant(), - StringLiteral(), - Punctuator(Punctuator), -} - -pub enum Keyword {} - -pub enum Constant { - Integer(i64), -} - +#[derive(Debug)] pub enum Punctuator { /// [ <: BracketOpen, @@ -363,3 +351,23 @@ pub fn preprocess_tokens( }; lexer } + +#[cfg(test)] +mod tests { + fn lex_test(str: &str) { + let bytes = str.bytes().enumerate(); + let tokens = super::preprocess_tokens(bytes); + let tokens = tokens.collect::>(); + insta::assert_debug_snapshot!(tokens); + } + + #[test] + fn hello_world() { + let src = r#"\ +int main() { + puts("Hello, World!"); +} +"#; + lex_test(src); + } +} diff --git a/parser/src/pre/mod.rs b/parser/src/pre/mod.rs new file mode 100644 index 0000000..f711db4 --- /dev/null +++ b/parser/src/pre/mod.rs @@ -0,0 +1,3 @@ +mod lexer; + +pub use lexer::{preprocess_tokens, PToken, Punctuator}; diff --git a/parser/src/token.rs b/parser/src/token.rs new file mode 100644 index 0000000..4a383ab --- /dev/null +++ b/parser/src/token.rs @@ -0,0 +1,21 @@ +use crate::{ + pre::{PToken, Punctuator}, + Span, +}; + +pub enum Token { + Keyword(Keyword), + Identifier(), + Constant(), + StringLiteral(), + Punctuator(Punctuator), + Error, +} + +pub struct Keyword; + +fn from_pre_toks( + pre_toks: impl Iterator, +) -> impl IntoIterator { + pre_toks.map(|token| todo!()) +}