lexer tokens

This commit is contained in:
nora 2022-06-21 19:23:03 +02:00
parent aa8f6a91b2
commit 2e5dfb24e2
3 changed files with 127 additions and 3 deletions

View file

@ -14,7 +14,7 @@ pub enum PToken<'src> {
HeaderName(&'src str),
Identifier(&'src str),
PpNumber(&'src str),
CharConstant,
CharConstant(u8),
StringLiteral(&'src str),
Punctuator(Punctuator),
OtherNonWs(u8),

View file

@ -0,0 +1,72 @@
---
source: parser/src/token.rs
expression: tokens
---
[
(
Keyword(
Int,
),
1..4,
),
(
Identifier(
"main",
),
5..9,
),
(
Punctuator(
ParenOpen,
),
9..10,
),
(
Punctuator(
ParenClose,
),
10..11,
),
(
Punctuator(
BraceOpen,
),
12..13,
),
(
Identifier(
"puts",
),
18..22,
),
(
Punctuator(
ParenOpen,
),
22..23,
),
(
StringLiteral(
"Hello, World!",
),
23..37,
),
(
Punctuator(
ParenClose,
),
38..39,
),
(
Punctuator(
Semicolon,
),
39..40,
),
(
Punctuator(
BraceClose,
),
41..42,
),
]

View file

@ -9,6 +9,7 @@ use crate::{
/// constant
/// string-literal
/// punctuator
#[derive(Debug)]
pub enum Token<'src> {
Keyword(Keyword),
Identifier(&'src str),
@ -18,6 +19,7 @@ pub enum Token<'src> {
Error,
}
#[derive(Debug)]
pub enum Keyword {
Auto,
Break,
@ -65,7 +67,13 @@ pub enum Keyword {
ThreadLocal,
}
pub enum Constant {}
#[derive(Debug)]
pub enum Constant {
Int(i128),
Float(f64),
Char(u8),
// adding enumerations here makes no sense.
}
fn ident_to_keyword(ident: &str) -> Option<Keyword> {
match ident {
@ -117,8 +125,52 @@ fn ident_to_keyword(ident: &str) -> Option<Keyword> {
}
}
fn pp_number_to_constant(number: &str) -> Option<Constant> {
let n = number.parse().ok()?;
Some(Constant::Int(n))
}
pub fn pre_tokens_to_tokens<'src>(
pre_tokens: impl Iterator<Item = (PToken<'src>, Span)>,
) -> impl Iterator<Item = (Token<'src>, Span)> {
pre_tokens.map(|_| todo!())
pre_tokens.map(|(token, span)| {
let token = match token {
PToken::HeaderName(_) => todo!("header names aren't real, wake up"),
PToken::Identifier(ident) => match ident_to_keyword(ident) {
Some(keyword) => Token::Keyword(keyword),
None => Token::Identifier(ident),
},
PToken::PpNumber(number) => pp_number_to_constant(number)
.map(Token::Constant)
.unwrap_or(Token::Error),
PToken::CharConstant(u8) => Token::Constant(Constant::Char(u8)),
PToken::StringLiteral(lit) => Token::StringLiteral(lit),
PToken::Punctuator(p) => Token::Punctuator(p),
PToken::OtherNonWs(_) => Token::Error,
PToken::Error => Token::Error,
};
(token, span)
})
}
#[cfg(test)]
mod tests {
macro_rules! lex_test {
($src:expr) => {
let pre_tokens = crate::pre::preprocess_tokens($src);
let tokens = super::pre_tokens_to_tokens(pre_tokens);
let tokens = tokens.collect::<Vec<_>>();
insta::assert_debug_snapshot!(tokens);
};
}
#[test]
fn hello_world() {
let src = r#"
int main() {
puts("Hello, World!");
}
"#;
lex_test!(src);
}
}