no terrible cloning!

This commit is contained in:
nora 2022-06-21 13:46:51 +02:00
parent 7fc10f3b6c
commit d12fa25df9
8 changed files with 63 additions and 96 deletions

3
Cargo.lock generated
View file

@ -207,6 +207,9 @@ checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
[[package]] [[package]]
name = "uwuc" name = "uwuc"
version = "0.1.0" version = "0.1.0"
dependencies = [
"parser",
]
[[package]] [[package]]
name = "winapi" name = "winapi"

View file

@ -1,5 +1,5 @@
[workspace] [workspace]
members = [".", "parser"] members = [".", "./parser"]
[package] [package]
name = "uwuc" name = "uwuc"
@ -9,3 +9,4 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
parser = { path = "./parser" }

View file

@ -1,6 +1,11 @@
#![allow(dead_code)] #![allow(dead_code)]
#![warn(rust_2018_idioms)]
mod pre; mod pre;
mod token; mod token;
pub type Span = std::ops::Range<usize>; pub type Span = std::ops::Range<usize>;
pub fn parse_file(src: &str) {
println!("{src}");
}

View file

@ -10,12 +10,12 @@ use peekmore::PeekMore;
use crate::Span; use crate::Span;
#[derive(Debug)] #[derive(Debug)]
pub enum PToken { pub enum PToken<'src> {
HeaderName(Vec<u8>), HeaderName(&'src str),
Identifier(Vec<u8>), Identifier(&'src str),
PpNumber(Vec<u8>), PpNumber(&'src str),
CharConstant, CharConstant,
StringLiteral(Vec<u8>), StringLiteral(&'src str),
Punctuator(Punctuator), Punctuator(Punctuator),
OtherNonWs(u8), OtherNonWs(u8),
Error, Error,
@ -119,58 +119,62 @@ pub enum Punctuator {
HashHash, HashHash,
} }
struct PLexer<I> struct PLexer<'src, I>
where where
I: Iterator<Item = (usize, u8)>, I: Iterator<Item = (usize, u8)>,
{ {
src_str: &'src str,
src: peekmore::PeekMoreIterator<I>, src: peekmore::PeekMoreIterator<I>,
} }
impl<I> PLexer<I> impl<'src, I> PLexer<'src, I>
where where
I: Iterator<Item = (usize, u8)>, I: Iterator<Item = (usize, u8)>,
{ {
/// 6.4.2 Identifiers /// 6.4.2 Identifiers
/// TODO: 6.4.3 Universal character names /// TODO: 6.4.3 Universal character names
fn identifier(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) { fn identifier(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
let mut ident = vec![c]; let first_span = last_span;
while let Some((span, c)) = self.src.peek() { while let Some((span, c)) = self.src.peek() {
println!("uwu {c}");
let (span, c) = (*span, *c); let (span, c) = (*span, *c);
if c.is_c_identifier() { if c.is_c_identifier() {
self.src.next(); self.src.next();
ident.push(c);
last_span = span; last_span = span;
} else { } else {
break; break;
} }
} }
(PToken::Identifier(ident), last_span) (
PToken::Identifier(&self.src_str[first_span..=last_span]),
last_span,
)
} }
/// 6.4.8 Preprocessing numbers /// 6.4.8 Preprocessing numbers
fn number(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) { fn number(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
let mut number = vec![c]; let first_span = last_span;
while let Some((span, c)) = self.src.peek() { while let Some((span, c)) = self.src.peek() {
let (span, c) = (*span, *c); let (span, c) = (*span, *c);
if c.is_ascii_digit() { if c.is_ascii_digit() {
self.src.next(); self.src.next();
number.push(c);
last_span = span; last_span = span;
} else { } else {
break; break;
} }
} }
(PToken::PpNumber(number), last_span) (
PToken::PpNumber(&self.src_str[first_span..=last_span]),
last_span,
)
} }
/// 6.4.5 String literals /// 6.4.5 String literals
fn string_literal(&mut self, mut last_span: usize) -> (PToken, usize) { fn string_literal(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
let mut string = Vec::new(); let first_span = last_span;
loop { loop {
let next = self.src.next(); let next = self.src.next();
@ -180,13 +184,15 @@ where
break; break;
} }
last_span = span; last_span = span;
string.push(c);
} }
None => return (PToken::Error, last_span), None => return (PToken::Error, last_span),
} }
} }
(PToken::StringLiteral(string), last_span) (
PToken::StringLiteral(&self.src_str[first_span + 1..=last_span]),
last_span,
)
} }
/// source peek /// source peek
@ -241,17 +247,17 @@ impl CLexExt for u8 {
} }
} }
impl<'src, I> Iterator for PLexer<I> impl<'src, I> Iterator for PLexer<'src, I>
where where
I: Iterator<Item = (usize, u8)>, I: Iterator<Item = (usize, u8)>,
{ {
type Item = (PToken, Span); type Item = (PToken<'src>, Span);
/// preprocessing-token: /// preprocessing-token:
/// header-name /// header-name TODO
/// identifier /// identifier
/// pp-number /// pp-number
/// character-constant /// character-constant TODO
/// string-literal /// string-literal
/// punctuator /// punctuator
/// each non-white-space character that cannot be one of the above /// each non-white-space character that cannot be one of the above
@ -269,11 +275,10 @@ where
match (char1, char2, char3) { match (char1, char2, char3) {
// IDENTIFIER // IDENTIFIER
(c, _, _) if c.is_c_identifier_nondigit() => { (c, _, _) if c.is_c_identifier_nondigit() => {
println!("AA"); break self.identifier(start_span);
break self.identifier(c, start_span);
} }
// NUMBER // NUMBER
(c, _, _) if c.is_c_identifier_digit() => break self.number(c, start_span), (c, _, _) if c.is_c_identifier_digit() => break self.number(start_span),
// STRING // STRING
(b'"', _, _) => break self.string_literal(start_span), (b'"', _, _) => break self.string_literal(start_span),
// WHITESPACE // WHITESPACE
@ -348,11 +353,10 @@ where
} }
} }
pub fn preprocess_tokens( pub fn preprocess_tokens(src: &str) -> impl Iterator<Item = (PToken<'_>, std::ops::Range<usize>)> {
src: impl Iterator<Item = (usize, u8)>,
) -> impl Iterator<Item = (PToken, std::ops::Range<usize>)> {
let lexer = PLexer { let lexer = PLexer {
src: src.peekmore(), src_str: src,
src: src.bytes().enumerate().peekmore(),
}; };
lexer lexer
} }
@ -361,8 +365,7 @@ pub fn preprocess_tokens(
mod tests { mod tests {
macro_rules! lex_test { macro_rules! lex_test {
($str:expr) => { ($str:expr) => {
let bytes = $str.bytes().enumerate(); let tokens = super::preprocess_tokens($str);
let tokens = super::preprocess_tokens(bytes);
let tokens = tokens.collect::<Vec<_>>(); let tokens = tokens.collect::<Vec<_>>();
insta::assert_debug_snapshot!(tokens); insta::assert_debug_snapshot!(tokens);
}; };

View file

@ -5,22 +5,13 @@ expression: tokens
[ [
( (
Identifier( Identifier(
[ "int",
105,
110,
116,
],
), ),
1..4, 1..4,
), ),
( (
Identifier( Identifier(
[ "main",
109,
97,
105,
110,
],
), ),
5..9, 5..9,
), ),
@ -44,12 +35,7 @@ expression: tokens
), ),
( (
Identifier( Identifier(
[ "puts",
112,
117,
116,
115,
],
), ),
18..22, 18..22,
), ),
@ -61,21 +47,7 @@ expression: tokens
), ),
( (
StringLiteral( StringLiteral(
[ "Hello, World!",
72,
101,
108,
108,
111,
44,
32,
87,
111,
114,
108,
100,
33,
],
), ),
23..37, 23..37,
), ),

View file

@ -5,34 +5,19 @@ expression: tokens
[ [
( (
Identifier( Identifier(
[ "AAAA",
65,
65,
65,
65,
],
), ),
0..4, 0..4,
), ),
( (
Identifier( Identifier(
[ "BBBB",
66,
66,
66,
66,
],
), ),
5..9, 5..9,
), ),
( (
Identifier( Identifier(
[ "CCCC",
67,
67,
67,
67,
],
), ),
10..14, 10..14,
), ),

View file

@ -1,7 +1,4 @@
use crate::{ use crate::pre::Punctuator;
pre::{PToken, Punctuator},
Span,
};
pub enum Token { pub enum Token {
Keyword(Keyword), Keyword(Keyword),
@ -13,9 +10,3 @@ pub enum Token {
} }
pub struct Keyword; pub struct Keyword;
fn from_pre_toks(
pre_toks: impl Iterator<Item = (PToken, Span)>,
) -> impl IntoIterator<Item = (Token, Span)> {
pre_toks.map(|token| todo!())
}

View file

@ -1,3 +1,10 @@
fn main() { use std::error::Error;
println!("Hello, world!");
fn main() -> Result<(), Box<dyn Error>> {
let input_file = std::env::args().nth(1).expect("first argument");
let src = std::fs::read_to_string(input_file)?;
parser::parse_file(src);
Ok(())
} }