From d12fa25df909e1b56c9ad4f64218a221426c4713 Mon Sep 17 00:00:00 2001
From: nils <48135649+Nilstrieb@users.noreply.github.com>
Date: Tue, 21 Jun 2022 13:46:51 +0200
Subject: [PATCH] no terrible cloning!

---
 Cargo.lock                                    |  3 +
 Cargo.toml                                    |  3 +-
 parser/src/lib.rs                             |  5 ++
 parser/src/pre/lexer.rs                       | 69 ++++++++++---------
 ...arser__pre__lexer__tests__hello_world.snap | 36 ++--------
 ...arser__pre__lexer__tests__identifiers.snap | 21 +-----
 parser/src/token.rs                           | 11 +--
 src/main.rs                                   | 11 ++-
 8 files changed, 63 insertions(+), 96 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 2483605..e6ca942 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -207,6 +207,9 @@ checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
 [[package]]
 name = "uwuc"
 version = "0.1.0"
+dependencies = [
+ "parser",
+]
 
 [[package]]
 name = "winapi"
diff --git a/Cargo.toml b/Cargo.toml
index e50bf1a..f6e4ae4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-members = [".", "parser"]
+members = [".", "./parser"]
 
 [package]
 name = "uwuc"
@@ -9,3 +9,4 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+parser = { path = "./parser" }
\ No newline at end of file
diff --git a/parser/src/lib.rs b/parser/src/lib.rs
index 30c1f50..7acd491 100644
--- a/parser/src/lib.rs
+++ b/parser/src/lib.rs
@@ -1,6 +1,11 @@
 #![allow(dead_code)]
+#![warn(rust_2018_idioms)]
 
 mod pre;
 mod token;
 
 pub type Span = std::ops::Range<usize>;
+
+pub fn parse_file(src: &str) {
+    println!("{src}");
+}
diff --git a/parser/src/pre/lexer.rs b/parser/src/pre/lexer.rs
index 37c89b0..9ef0a1e 100644
--- a/parser/src/pre/lexer.rs
+++ b/parser/src/pre/lexer.rs
@@ -10,12 +10,12 @@ use peekmore::PeekMore;
 use crate::Span;
 
 #[derive(Debug)]
-pub enum PToken {
-    HeaderName(Vec<u8>),
-    Identifier(Vec<u8>),
-    PpNumber(Vec<u8>),
+pub enum PToken<'src> {
+    HeaderName(&'src str),
+    Identifier(&'src str),
+    PpNumber(&'src str),
     CharConstant,
-    StringLiteral(Vec<u8>),
+    StringLiteral(&'src str),
     Punctuator(Punctuator),
     OtherNonWs(u8),
     Error,
@@ -119,58 +119,62 @@ pub enum Punctuator {
     HashHash,
 }
 
-struct PLexer<I>
+struct PLexer<'src, I>
 where
     I: Iterator<Item = (usize, u8)>,
 {
+    src_str: &'src str,
     src: peekmore::PeekMoreIterator<I>,
 }
 
-impl<I> PLexer<I>
+impl<'src, I> PLexer<'src, I>
 where
     I: Iterator<Item = (usize, u8)>,
 {
     /// 6.4.2 Identifiers
     /// TODO: 6.4.3 Universal character names
-    fn identifier(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) {
-        let mut ident = vec![c];
+    fn identifier(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
+        let first_span = last_span;
 
         while let Some((span, c)) = self.src.peek() {
-            println!("uwu {c}");
             let (span, c) = (*span, *c);
             if c.is_c_identifier() {
                 self.src.next();
-                ident.push(c);
                 last_span = span;
             } else {
                 break;
             }
         }
 
-        (PToken::Identifier(ident), last_span)
+        (
+            PToken::Identifier(&self.src_str[first_span..=last_span]),
+            last_span,
+        )
     }
 
     /// 6.4.8 Preprocessing numbers
-    fn number(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) {
-        let mut number = vec![c];
+    fn number(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
+        let first_span = last_span;
 
         while let Some((span, c)) = self.src.peek() {
             let (span, c) = (*span, *c);
             if c.is_ascii_digit() {
                 self.src.next();
-                number.push(c);
                 last_span = span;
             } else {
                 break;
             }
         }
 
-        (PToken::PpNumber(number), last_span)
+        (
+            PToken::PpNumber(&self.src_str[first_span..=last_span]),
+            last_span,
+        )
     }
 
     /// 6.4.5 String literals
-    fn string_literal(&mut self, mut last_span: usize) -> (PToken, usize) {
-        let mut string = Vec::new();
+    fn string_literal(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
+        let first_span = last_span;
 
         loop {
             let next = self.src.next();
@@ -180,13 +184,15 @@ where
                         break;
                     }
                     last_span = span;
-                    string.push(c);
                 }
                 None => return (PToken::Error, last_span),
             }
         }
 
-        (PToken::StringLiteral(string), last_span)
+        (
+            PToken::StringLiteral(&self.src_str[first_span + 1..=last_span]),
+            last_span,
+        )
     }
 
     /// source peek
@@ -241,17 +247,17 @@ impl CLexExt for u8 {
     }
 }
 
-impl<'src, I> Iterator for PLexer<I>
+impl<'src, I> Iterator for PLexer<'src, I>
 where
     I: Iterator<Item = (usize, u8)>,
 {
-    type Item = (PToken, Span);
+    type Item = (PToken<'src>, Span);
 
     /// preprocessing-token:
-    ///   header-name
+    ///   header-name         TODO
     ///   identifier
     ///   pp-number
-    ///   character-constant
+    ///   character-constant  TODO
     ///   string-literal
     ///   punctuator
     ///   each non-white-space character that cannot be one of the above
@@ -269,11 +275,10 @@ where
             match (char1, char2, char3) {
                 // IDENTIFIER
                 (c, _, _) if c.is_c_identifier_nondigit() => {
-                    println!("AA");
-                    break self.identifier(c, start_span);
+                    break self.identifier(start_span);
                 }
                 // NUMBER
-                (c, _, _) if c.is_c_identifier_digit() => break self.number(c, start_span),
+                (c, _, _) if c.is_c_identifier_digit() => break self.number(start_span),
                 // STRING
                 (b'"', _, _) => break self.string_literal(start_span),
                 // WHITESPACE
@@ -348,11 +353,10 @@ where
     }
 }
 
-pub fn preprocess_tokens(
-    src: impl Iterator<Item = (usize, u8)>,
-) -> impl Iterator<Item = (PToken, std::ops::Range<usize>)> {
+pub fn preprocess_tokens(src: &str) -> impl Iterator<Item = (PToken<'_>, std::ops::Range<usize>)> {
     let lexer = PLexer {
-        src: src.peekmore(),
+        src_str: src,
+        src: src.bytes().enumerate().peekmore(),
     };
     lexer
 }
@@ -361,8 +365,7 @@ pub fn preprocess_tokens(
 mod tests {
     macro_rules! lex_test {
         ($str:expr) => {
-            let bytes = $str.bytes().enumerate();
-            let tokens = super::preprocess_tokens(bytes);
+            let tokens = super::preprocess_tokens($str);
             let tokens = tokens.collect::<Vec<_>>();
             insta::assert_debug_snapshot!(tokens);
         };
diff --git a/parser/src/pre/snapshots/parser__pre__lexer__tests__hello_world.snap b/parser/src/pre/snapshots/parser__pre__lexer__tests__hello_world.snap
index c0a7a20..6d3b59e 100644
--- a/parser/src/pre/snapshots/parser__pre__lexer__tests__hello_world.snap
+++ b/parser/src/pre/snapshots/parser__pre__lexer__tests__hello_world.snap
@@ -5,22 +5,13 @@ expression: tokens
 [
     (
         Identifier(
-            [
-                105,
-                110,
-                116,
-            ],
+            "int",
         ),
         1..4,
     ),
     (
         Identifier(
-            [
-                109,
-                97,
-                105,
-                110,
-            ],
+            "main",
         ),
         5..9,
     ),
@@ -44,12 +35,7 @@ expression: tokens
     ),
     (
         Identifier(
-            [
-                112,
-                117,
-                116,
-                115,
-            ],
+            "puts",
         ),
         18..22,
     ),
@@ -61,21 +47,7 @@ expression: tokens
     ),
     (
         StringLiteral(
-            [
-                72,
-                101,
-                108,
-                108,
-                111,
-                44,
-                32,
-                87,
-                111,
-                114,
-                108,
-                100,
-                33,
-            ],
+            "Hello, World!",
         ),
         23..37,
     ),
diff --git a/parser/src/pre/snapshots/parser__pre__lexer__tests__identifiers.snap b/parser/src/pre/snapshots/parser__pre__lexer__tests__identifiers.snap
index f0e69cd..af0ba20 100644
--- a/parser/src/pre/snapshots/parser__pre__lexer__tests__identifiers.snap
+++ b/parser/src/pre/snapshots/parser__pre__lexer__tests__identifiers.snap
@@ -5,34 +5,19 @@ expression: tokens
 [
     (
         Identifier(
-            [
-                65,
-                65,
-                65,
-                65,
-            ],
+            "AAAA",
         ),
         0..4,
     ),
     (
         Identifier(
-            [
-                66,
-                66,
-                66,
-                66,
-            ],
+            "BBBB",
         ),
         5..9,
     ),
     (
         Identifier(
-            [
-                67,
-                67,
-                67,
-                67,
-            ],
+            "CCCC",
         ),
         10..14,
     ),
diff --git a/parser/src/token.rs b/parser/src/token.rs
index 4a383ab..3bfdda0 100644
--- a/parser/src/token.rs
+++ b/parser/src/token.rs
@@ -1,7 +1,4 @@
-use crate::{
-    pre::{PToken, Punctuator},
-    Span,
-};
+use crate::pre::Punctuator;
 
 pub enum Token {
     Keyword(Keyword),
@@ -13,9 +10,3 @@ pub enum Token {
 }
 
 pub struct Keyword;
-
-fn from_pre_toks(
-    pre_toks: impl Iterator<Item = (PToken, Span)>,
-) -> impl IntoIterator<Item = (Token, Span)> {
-    pre_toks.map(|token| todo!())
-}
diff --git a/src/main.rs b/src/main.rs
index e7a11a9..6bc697b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,10 @@
-fn main() {
-    println!("Hello, world!");
+use std::error::Error;
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let input_file = std::env::args().nth(1).expect("first argument");
+    let src = std::fs::read_to_string(input_file)?;
+
+    parser::parse_file(src);
+
+    Ok(())
 }