From d12fa25df909e1b56c9ad4f64218a221426c4713 Mon Sep 17 00:00:00 2001 From: nils <48135649+Nilstrieb@users.noreply.github.com> Date: Tue, 21 Jun 2022 13:46:51 +0200 Subject: [PATCH] no terrible cloning! --- Cargo.lock | 3 + Cargo.toml | 3 +- parser/src/lib.rs | 5 ++ parser/src/pre/lexer.rs | 69 ++++++++++--------- ...arser__pre__lexer__tests__hello_world.snap | 36 ++-------- ...arser__pre__lexer__tests__identifiers.snap | 21 +----- parser/src/token.rs | 11 +-- src/main.rs | 11 ++- 8 files changed, 63 insertions(+), 96 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2483605..e6ca942 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -207,6 +207,9 @@ checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" [[package]] name = "uwuc" version = "0.1.0" +dependencies = [ + "parser", +] [[package]] name = "winapi" diff --git a/Cargo.toml b/Cargo.toml index e50bf1a..f6e4ae4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = [".", "parser"] +members = [".", "./parser"] [package] name = "uwuc" @@ -9,3 +9,4 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +parser = { path = "./parser" } \ No newline at end of file diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 30c1f50..7acd491 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -1,6 +1,11 @@ #![allow(dead_code)] +#![warn(rust_2018_idioms)] mod pre; mod token; pub type Span = std::ops::Range; + +pub fn parse_file(src: &str) { + println!("{src}"); +} diff --git a/parser/src/pre/lexer.rs b/parser/src/pre/lexer.rs index 37c89b0..9ef0a1e 100644 --- a/parser/src/pre/lexer.rs +++ b/parser/src/pre/lexer.rs @@ -10,12 +10,12 @@ use peekmore::PeekMore; use crate::Span; #[derive(Debug)] -pub enum PToken { - HeaderName(Vec), - Identifier(Vec), - PpNumber(Vec), +pub enum PToken<'src> { + HeaderName(&'src str), + Identifier(&'src str), + PpNumber(&'src str), CharConstant, - StringLiteral(Vec), + StringLiteral(&'src str), Punctuator(Punctuator), OtherNonWs(u8), Error, @@ -119,58 +119,62 @@ pub enum Punctuator { HashHash, } -struct PLexer +struct PLexer<'src, I> where I: Iterator, { + src_str: &'src str, src: peekmore::PeekMoreIterator, } -impl PLexer +impl<'src, I> PLexer<'src, I> where I: Iterator, { /// 6.4.2 Identifiers /// TODO: 6.4.3 Universal character names - fn identifier(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) { - let mut ident = vec![c]; + fn identifier(&mut self, mut last_span: usize) -> (PToken<'src>, usize) { + let first_span = last_span; while let Some((span, c)) = self.src.peek() { - println!("uwu {c}"); let (span, c) = (*span, *c); if c.is_c_identifier() { self.src.next(); - ident.push(c); last_span = span; } else { break; } } - (PToken::Identifier(ident), last_span) + ( + PToken::Identifier(&self.src_str[first_span..=last_span]), + last_span, + ) } /// 6.4.8 Preprocessing numbers - fn number(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) { - let mut number = vec![c]; + fn number(&mut self, mut last_span: usize) -> (PToken<'src>, usize) { + let first_span = last_span; while let Some((span, c)) = self.src.peek() { let (span, c) = (*span, *c); if c.is_ascii_digit() { self.src.next(); - number.push(c); last_span = span; } else { break; } } - (PToken::PpNumber(number), last_span) + ( + PToken::PpNumber(&self.src_str[first_span..=last_span]), + last_span, + ) } /// 6.4.5 String literals - fn string_literal(&mut self, mut last_span: usize) -> (PToken, usize) { - let mut string = Vec::new(); + fn string_literal(&mut self, mut last_span: usize) -> (PToken<'src>, usize) { + let first_span = last_span; loop { let next = self.src.next(); @@ -180,13 +184,15 @@ where break; } last_span = span; - string.push(c); } None => return (PToken::Error, last_span), } } - (PToken::StringLiteral(string), last_span) + ( + PToken::StringLiteral(&self.src_str[first_span + 1..=last_span]), + last_span, + ) } /// source peek @@ -241,17 +247,17 @@ impl CLexExt for u8 { } } -impl<'src, I> Iterator for PLexer +impl<'src, I> Iterator for PLexer<'src, I> where I: Iterator, { - type Item = (PToken, Span); + type Item = (PToken<'src>, Span); /// preprocessing-token: - /// header-name + /// header-name TODO /// identifier /// pp-number - /// character-constant + /// character-constant TODO /// string-literal /// punctuator /// each non-white-space character that cannot be one of the above @@ -269,11 +275,10 @@ where match (char1, char2, char3) { // IDENTIFIER (c, _, _) if c.is_c_identifier_nondigit() => { - println!("AA"); - break self.identifier(c, start_span); + break self.identifier(start_span); } // NUMBER - (c, _, _) if c.is_c_identifier_digit() => break self.number(c, start_span), + (c, _, _) if c.is_c_identifier_digit() => break self.number(start_span), // STRING (b'"', _, _) => break self.string_literal(start_span), // WHITESPACE @@ -348,11 +353,10 @@ where } } -pub fn preprocess_tokens( - src: impl Iterator, -) -> impl Iterator)> { +pub fn preprocess_tokens(src: &str) -> impl Iterator, std::ops::Range)> { let lexer = PLexer { - src: src.peekmore(), + src_str: src, + src: src.bytes().enumerate().peekmore(), }; lexer } @@ -361,8 +365,7 @@ pub fn preprocess_tokens( mod tests { macro_rules! lex_test { ($str:expr) => { - let bytes = $str.bytes().enumerate(); - let tokens = super::preprocess_tokens(bytes); + let tokens = super::preprocess_tokens($str); let tokens = tokens.collect::>(); insta::assert_debug_snapshot!(tokens); }; diff --git a/parser/src/pre/snapshots/parser__pre__lexer__tests__hello_world.snap b/parser/src/pre/snapshots/parser__pre__lexer__tests__hello_world.snap index c0a7a20..6d3b59e 100644 --- a/parser/src/pre/snapshots/parser__pre__lexer__tests__hello_world.snap +++ b/parser/src/pre/snapshots/parser__pre__lexer__tests__hello_world.snap @@ -5,22 +5,13 @@ expression: tokens [ ( Identifier( - [ - 105, - 110, - 116, - ], + "int", ), 1..4, ), ( Identifier( - [ - 109, - 97, - 105, - 110, - ], + "main", ), 5..9, ), @@ -44,12 +35,7 @@ expression: tokens ), ( Identifier( - [ - 112, - 117, - 116, - 115, - ], + "puts", ), 18..22, ), @@ -61,21 +47,7 @@ expression: tokens ), ( StringLiteral( - [ - 72, - 101, - 108, - 108, - 111, - 44, - 32, - 87, - 111, - 114, - 108, - 100, - 33, - ], + "Hello, World!", ), 23..37, ), diff --git a/parser/src/pre/snapshots/parser__pre__lexer__tests__identifiers.snap b/parser/src/pre/snapshots/parser__pre__lexer__tests__identifiers.snap index f0e69cd..af0ba20 100644 --- a/parser/src/pre/snapshots/parser__pre__lexer__tests__identifiers.snap +++ b/parser/src/pre/snapshots/parser__pre__lexer__tests__identifiers.snap @@ -5,34 +5,19 @@ expression: tokens [ ( Identifier( - [ - 65, - 65, - 65, - 65, - ], + "AAAA", ), 0..4, ), ( Identifier( - [ - 66, - 66, - 66, - 66, - ], + "BBBB", ), 5..9, ), ( Identifier( - [ - 67, - 67, - 67, - 67, - ], + "CCCC", ), 10..14, ), diff --git a/parser/src/token.rs b/parser/src/token.rs index 4a383ab..3bfdda0 100644 --- a/parser/src/token.rs +++ b/parser/src/token.rs @@ -1,7 +1,4 @@ -use crate::{ - pre::{PToken, Punctuator}, - Span, -}; +use crate::pre::Punctuator; pub enum Token { Keyword(Keyword), @@ -13,9 +10,3 @@ pub enum Token { } pub struct Keyword; - -fn from_pre_toks( - pre_toks: impl Iterator, -) -> impl IntoIterator { - pre_toks.map(|token| todo!()) -} diff --git a/src/main.rs b/src/main.rs index e7a11a9..6bc697b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,10 @@ -fn main() { - println!("Hello, world!"); +use std::error::Error; + +fn main() -> Result<(), Box> { + let input_file = std::env::args().nth(1).expect("first argument"); + let src = std::fs::read_to_string(input_file)?; + + parser::parse_file(src); + + Ok(()) }