diff --git a/parser/src/pre/lexer.rs b/parser/src/pre/lexer.rs index 80e1eee..f1d09d6 100644 --- a/parser/src/pre/lexer.rs +++ b/parser/src/pre/lexer.rs @@ -14,7 +14,7 @@ pub enum PToken<'src> { HeaderName(&'src str), Identifier(&'src str), PpNumber(&'src str), - CharConstant, + CharConstant(u8), StringLiteral(&'src str), Punctuator(Punctuator), OtherNonWs(u8), diff --git a/parser/src/snapshots/parser__token__tests__hello_world.snap b/parser/src/snapshots/parser__token__tests__hello_world.snap new file mode 100644 index 0000000..7648405 --- /dev/null +++ b/parser/src/snapshots/parser__token__tests__hello_world.snap @@ -0,0 +1,72 @@ +--- +source: parser/src/token.rs +expression: tokens +--- +[ + ( + Keyword( + Int, + ), + 1..4, + ), + ( + Identifier( + "main", + ), + 5..9, + ), + ( + Punctuator( + ParenOpen, + ), + 9..10, + ), + ( + Punctuator( + ParenClose, + ), + 10..11, + ), + ( + Punctuator( + BraceOpen, + ), + 12..13, + ), + ( + Identifier( + "puts", + ), + 18..22, + ), + ( + Punctuator( + ParenOpen, + ), + 22..23, + ), + ( + StringLiteral( + "Hello, World!", + ), + 23..37, + ), + ( + Punctuator( + ParenClose, + ), + 38..39, + ), + ( + Punctuator( + Semicolon, + ), + 39..40, + ), + ( + Punctuator( + BraceClose, + ), + 41..42, + ), +] diff --git a/parser/src/token.rs b/parser/src/token.rs index 88fded6..296ee5f 100644 --- a/parser/src/token.rs +++ b/parser/src/token.rs @@ -9,6 +9,7 @@ use crate::{ /// constant /// string-literal /// punctuator +#[derive(Debug)] pub enum Token<'src> { Keyword(Keyword), Identifier(&'src str), @@ -18,6 +19,7 @@ pub enum Token<'src> { Error, } +#[derive(Debug)] pub enum Keyword { Auto, Break, @@ -65,7 +67,13 @@ pub enum Keyword { ThreadLocal, } -pub enum Constant {} +#[derive(Debug)] +pub enum Constant { + Int(i128), + Float(f64), + Char(u8), + // adding enumerations here makes no sense. +} fn ident_to_keyword(ident: &str) -> Option { match ident { @@ -117,8 +125,52 @@ fn ident_to_keyword(ident: &str) -> Option { } } +fn pp_number_to_constant(number: &str) -> Option { + let n = number.parse().ok()?; + Some(Constant::Int(n)) +} + pub fn pre_tokens_to_tokens<'src>( pre_tokens: impl Iterator, Span)>, ) -> impl Iterator, Span)> { - pre_tokens.map(|_| todo!()) + pre_tokens.map(|(token, span)| { + let token = match token { + PToken::HeaderName(_) => todo!("header names aren't real, wake up"), + PToken::Identifier(ident) => match ident_to_keyword(ident) { + Some(keyword) => Token::Keyword(keyword), + None => Token::Identifier(ident), + }, + PToken::PpNumber(number) => pp_number_to_constant(number) + .map(Token::Constant) + .unwrap_or(Token::Error), + PToken::CharConstant(u8) => Token::Constant(Constant::Char(u8)), + PToken::StringLiteral(lit) => Token::StringLiteral(lit), + PToken::Punctuator(p) => Token::Punctuator(p), + PToken::OtherNonWs(_) => Token::Error, + PToken::Error => Token::Error, + }; + (token, span) + }) } + +#[cfg(test)] +mod tests { + macro_rules! lex_test { + ($src:expr) => { + let pre_tokens = crate::pre::preprocess_tokens($src); + let tokens = super::pre_tokens_to_tokens(pre_tokens); + let tokens = tokens.collect::>(); + insta::assert_debug_snapshot!(tokens); + }; + } + + #[test] + fn hello_world() { + let src = r#" +int main() { + puts("Hello, World!"); +} +"#; + lex_test!(src); + } +} \ No newline at end of file