This commit is contained in:
nora 2022-06-21 09:04:00 +02:00
parent 594d491979
commit ca33e5f0c5
4 changed files with 230 additions and 13 deletions

11
Cargo.lock generated Normal file
View file

@ -0,0 +1,11 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "parser"
version = "0.1.0"
[[package]]
name = "uwuc"
version = "0.1.0"

View file

@ -1,3 +1,6 @@
[workspace]
members = [".", "parser"]
[package] [package]
name = "uwuc" name = "uwuc"
version = "0.1.0" version = "0.1.0"

214
parser/src/lexer.rs Normal file
View file

@ -0,0 +1,214 @@
//! Contrary to popular belief, Dennis Ritchie did not invent the C grammar.
//! The C grammar was brought to Dennis Ritchie by a demon in hos worst dreams
pub enum PToken {
HeaderName(Vec<u8>),
Identifier(Vec<u8>),
PpNumber(Vec<u8>),
CharConstant,
StringLiteral(Vec<u8>),
Punctuator(Punctuator),
OtherNonWs(u8),
Error,
}
pub enum Token {
Keyword(Keyword),
Identifier(),
Constant(),
StringLiteral(),
Punctuator(Punctuator),
}
pub enum Keyword {}
pub enum Constant {
Integer(i64),
}
pub enum Punctuator {
/// [ <:
BracketOpen,
/// ] :>
BracketClose,
/// (
ParenOpen,
/// )
ParenClose,
/// { <%
BraceOpen,
/// } %>
BraceClose,
/// .
Dot,
/// ->
Arrow,
/// ++
PlusPlus,
/// --
MinusMinus,
/// &
Ampersand,
/// *
Asterisk,
/// +
Plus,
/// -
Minus,
/// ~
Tilde,
/// ! 🔫
Bang,
//// %
Percent,
/// <<
LeftLeftChevron,
/// >>
RightRightChevron,
/// <
LeftChevron,
/// >
RightChevron,
/// <=
LeftChevronEqual,
/// >=
RightChevronEqual,
/// ==
EqualEqual,
/// !=
BangEqual,
/// ^
Caret,
/// |
Pipe,
/// &&
AmpersandAmpersand,
/// ||
PipePipe,
/// ?
QuestionMark,
/// :
Colon,
/// ;
Semicolon,
/// ...
DotDotDot,
/// =
Equal,
/// *=
AsteriskEqual,
/// /=
SlashEqual,
/// %=
PercentEqual,
/// +=
PlusEqual,
/// -=
MinusEqual,
/// <<=
LeftLeftChevronEqual,
/// >>=
RightRightChevronEqual,
/// &=
AmspersandEqual,
/// ^=
CaretEqual,
/// |=
PipeEqual,
/// ,
Comman,
/// # %:
Hash,
/// ## %:%:
HashHash,
}
struct PLexer<I>
where
I: Iterator<Item = u8>,
{
src: std::iter::Peekable<I>,
}
impl<I> PLexer<I>
where
I: Iterator<Item = u8>,
{
/// 6.4.2 Identifiers
/// TODO: 6.4.3 Universal character names
fn identifier(&mut self, c: u8) -> PToken {
let mut ident = vec![c];
while let Some(&c) = self.src.peek() {
if let b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'0'..=b'9' = c {
self.src.next();
ident.push(c);
} else {
break;
}
}
PToken::Identifier(ident)
}
/// 6.4.8 Preprocessing numbers
fn number(&mut self, c: u8) -> PToken {
let mut number = vec![c];
while let Some(&c) = self.src.peek() {
if let b'0'..=b'9' = c {
self.src.next();
number.push(c);
} else {
break;
}
}
PToken::PpNumber(number)
}
/// 6.4.5 String literals
fn string_literal(&mut self) -> PToken {
let mut string = Vec::new();
while let c @ b'"' = {
match self.src.next() {
Some(next) => next,
None => return PToken::Error,
}
} {
string.push(c);
}
PToken::StringLiteral(string)
}
}
impl<'src, I> Iterator for PLexer<I>
where
I: Iterator<Item = u8>,
{
type Item = PToken;
/// preprocessing-token:
/// header-name
/// identifier
/// pp-number
/// character-constant
/// string-literal
/// punctuator
/// each non-white-space character that cannot be one of the above
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.src.next()? {
c @ (b'a'..=b'z' | b'A'..=b'Z' | b'_') => {
return Some(self.identifier(c));
}
c @ b'0'..=b'9' => return Some(self.number(c)),
b'"' => return Some(self.string_literal()),
b'[' => return Some(PToken::Punctuator(Punctuator::BraceOpen)),
c if c.is_ascii_whitespace() => {}
c => return Some(PToken::OtherNonWs(c)),
}
}
}
}

View file

@ -1,14 +1,3 @@
pub fn add(left: usize, right: usize) -> usize { #![allow(dead_code)]
left + right
}
#[cfg(test)] mod lexer;
mod tests {
use super::*;
#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}