no terrible cloning!

This commit is contained in:
nora 2022-06-21 13:46:51 +02:00
parent 7fc10f3b6c
commit d12fa25df9
8 changed files with 63 additions and 96 deletions

3
Cargo.lock generated
View file

@ -207,6 +207,9 @@ checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
[[package]]
name = "uwuc"
version = "0.1.0"
dependencies = [
"parser",
]
[[package]]
name = "winapi"

View file

@ -1,5 +1,5 @@
[workspace]
members = [".", "parser"]
members = [".", "./parser"]
[package]
name = "uwuc"
@ -9,3 +9,4 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
parser = { path = "./parser" }

View file

@ -1,6 +1,11 @@
#![allow(dead_code)]
#![warn(rust_2018_idioms)]
mod pre;
mod token;
pub type Span = std::ops::Range<usize>;
pub fn parse_file(src: &str) {
println!("{src}");
}

View file

@ -10,12 +10,12 @@ use peekmore::PeekMore;
use crate::Span;
#[derive(Debug)]
pub enum PToken {
HeaderName(Vec<u8>),
Identifier(Vec<u8>),
PpNumber(Vec<u8>),
pub enum PToken<'src> {
HeaderName(&'src str),
Identifier(&'src str),
PpNumber(&'src str),
CharConstant,
StringLiteral(Vec<u8>),
StringLiteral(&'src str),
Punctuator(Punctuator),
OtherNonWs(u8),
Error,
@ -119,58 +119,62 @@ pub enum Punctuator {
HashHash,
}
struct PLexer<I>
struct PLexer<'src, I>
where
I: Iterator<Item = (usize, u8)>,
{
src_str: &'src str,
src: peekmore::PeekMoreIterator<I>,
}
impl<I> PLexer<I>
impl<'src, I> PLexer<'src, I>
where
I: Iterator<Item = (usize, u8)>,
{
/// 6.4.2 Identifiers
/// TODO: 6.4.3 Universal character names
fn identifier(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) {
let mut ident = vec![c];
fn identifier(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
let first_span = last_span;
while let Some((span, c)) = self.src.peek() {
println!("uwu {c}");
let (span, c) = (*span, *c);
if c.is_c_identifier() {
self.src.next();
ident.push(c);
last_span = span;
} else {
break;
}
}
(PToken::Identifier(ident), last_span)
(
PToken::Identifier(&self.src_str[first_span..=last_span]),
last_span,
)
}
/// 6.4.8 Preprocessing numbers
fn number(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) {
let mut number = vec![c];
fn number(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
let first_span = last_span;
while let Some((span, c)) = self.src.peek() {
let (span, c) = (*span, *c);
if c.is_ascii_digit() {
self.src.next();
number.push(c);
last_span = span;
} else {
break;
}
}
(PToken::PpNumber(number), last_span)
(
PToken::PpNumber(&self.src_str[first_span..=last_span]),
last_span,
)
}
/// 6.4.5 String literals
fn string_literal(&mut self, mut last_span: usize) -> (PToken, usize) {
let mut string = Vec::new();
fn string_literal(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
let first_span = last_span;
loop {
let next = self.src.next();
@ -180,13 +184,15 @@ where
break;
}
last_span = span;
string.push(c);
}
None => return (PToken::Error, last_span),
}
}
(PToken::StringLiteral(string), last_span)
(
PToken::StringLiteral(&self.src_str[first_span + 1..=last_span]),
last_span,
)
}
/// source peek
@ -241,17 +247,17 @@ impl CLexExt for u8 {
}
}
impl<'src, I> Iterator for PLexer<I>
impl<'src, I> Iterator for PLexer<'src, I>
where
I: Iterator<Item = (usize, u8)>,
{
type Item = (PToken, Span);
type Item = (PToken<'src>, Span);
/// preprocessing-token:
/// header-name
/// header-name TODO
/// identifier
/// pp-number
/// character-constant
/// character-constant TODO
/// string-literal
/// punctuator
/// each non-white-space character that cannot be one of the above
@ -269,11 +275,10 @@ where
match (char1, char2, char3) {
// IDENTIFIER
(c, _, _) if c.is_c_identifier_nondigit() => {
println!("AA");
break self.identifier(c, start_span);
break self.identifier(start_span);
}
// NUMBER
(c, _, _) if c.is_c_identifier_digit() => break self.number(c, start_span),
(c, _, _) if c.is_c_identifier_digit() => break self.number(start_span),
// STRING
(b'"', _, _) => break self.string_literal(start_span),
// WHITESPACE
@ -348,11 +353,10 @@ where
}
}
pub fn preprocess_tokens(
src: impl Iterator<Item = (usize, u8)>,
) -> impl Iterator<Item = (PToken, std::ops::Range<usize>)> {
pub fn preprocess_tokens(src: &str) -> impl Iterator<Item = (PToken<'_>, std::ops::Range<usize>)> {
let lexer = PLexer {
src: src.peekmore(),
src_str: src,
src: src.bytes().enumerate().peekmore(),
};
lexer
}
@ -361,8 +365,7 @@ pub fn preprocess_tokens(
mod tests {
macro_rules! lex_test {
($str:expr) => {
let bytes = $str.bytes().enumerate();
let tokens = super::preprocess_tokens(bytes);
let tokens = super::preprocess_tokens($str);
let tokens = tokens.collect::<Vec<_>>();
insta::assert_debug_snapshot!(tokens);
};

View file

@ -5,22 +5,13 @@ expression: tokens
[
(
Identifier(
[
105,
110,
116,
],
"int",
),
1..4,
),
(
Identifier(
[
109,
97,
105,
110,
],
"main",
),
5..9,
),
@ -44,12 +35,7 @@ expression: tokens
),
(
Identifier(
[
112,
117,
116,
115,
],
"puts",
),
18..22,
),
@ -61,21 +47,7 @@ expression: tokens
),
(
StringLiteral(
[
72,
101,
108,
108,
111,
44,
32,
87,
111,
114,
108,
100,
33,
],
"Hello, World!",
),
23..37,
),

View file

@ -5,34 +5,19 @@ expression: tokens
[
(
Identifier(
[
65,
65,
65,
65,
],
"AAAA",
),
0..4,
),
(
Identifier(
[
66,
66,
66,
66,
],
"BBBB",
),
5..9,
),
(
Identifier(
[
67,
67,
67,
67,
],
"CCCC",
),
10..14,
),

View file

@ -1,7 +1,4 @@
use crate::{
pre::{PToken, Punctuator},
Span,
};
use crate::pre::Punctuator;
pub enum Token {
Keyword(Keyword),
@ -13,9 +10,3 @@ pub enum Token {
}
pub struct Keyword;
fn from_pre_toks(
pre_toks: impl Iterator<Item = (PToken, Span)>,
) -> impl IntoIterator<Item = (Token, Span)> {
pre_toks.map(|token| todo!())
}

View file

@ -1,3 +1,10 @@
fn main() {
println!("Hello, world!");
use std::error::Error;
fn main() -> Result<(), Box<dyn Error>> {
let input_file = std::env::args().nth(1).expect("first argument");
let src = std::fs::read_to_string(input_file)?;
parser::parse_file(src);
Ok(())
}