no terrible cloning!

2026-03-14 21:26:05 +01:00 · 2022-06-21 13:46:51 +02:00 · 2022-06-21 13:46:51 +02:00 · d12fa25df9
commit d12fa25df9
parent 7fc10f3b6c
8 changed files with 63 additions and 96 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -207,6 +207,9 @@ checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
 [[package]]
 name = "uwuc"
 version = "0.1.0"
 dependencies = [
 "parser",
 ]
 [[package]]
 name = "winapi"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,5 +1,5 @@
 [workspace]
-members = [".", "parser"]
+members = [".", "./parser"]
 [package]
 name = "uwuc"
@ -9,3 +9,4 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 parser = { path = "./parser" }
--- a/parser/src/lib.rs
+++ b/parser/src/lib.rs
@ -1,6 +1,11 @@
 #![allow(dead_code)]
 #![warn(rust_2018_idioms)]
 mod pre;
 mod token;
 pub type Span = std::ops::Range<usize>;
 pub fn parse_file(src: &str) {
    println!("{src}");
 }
--- a/parser/src/pre/lexer.rs
+++ b/parser/src/pre/lexer.rs
@ -10,12 +10,12 @@ use peekmore::PeekMore;
 use crate::Span;
 #[derive(Debug)]
-pub enum PToken {
+pub enum PToken<'src> {
-    HeaderName(Vec<u8>),
+    HeaderName(&'src str),
-    Identifier(Vec<u8>),
+    Identifier(&'src str),
-    PpNumber(Vec<u8>),
+    PpNumber(&'src str),
    CharConstant,
-    StringLiteral(Vec<u8>),
+    StringLiteral(&'src str),
    Punctuator(Punctuator),
    OtherNonWs(u8),
    Error,
@ -119,58 +119,62 @@ pub enum Punctuator {
    HashHash,
 }
-struct PLexer<I>
+struct PLexer<'src, I>
 where
    I: Iterator<Item = (usize, u8)>,
 {
    src_str: &'src str,
    src: peekmore::PeekMoreIterator<I>,
 }
-impl<I> PLexer<I>
+impl<'src, I> PLexer<'src, I>
 where
    I: Iterator<Item = (usize, u8)>,
 {
    /// 6.4.2 Identifiers
    /// TODO: 6.4.3 Universal character names
-    fn identifier(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) {
+    fn identifier(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
-        let mut ident = vec![c];
+        let first_span = last_span;
        while let Some((span, c)) = self.src.peek() {
            println!("uwu {c}");
            let (span, c) = (*span, *c);
            if c.is_c_identifier() {
                self.src.next();
                ident.push(c);
                last_span = span;
            } else {
                break;
            }
        }
-        (PToken::Identifier(ident), last_span)
+        (
            PToken::Identifier(&self.src_str[first_span..=last_span]),
            last_span,
        )
    }
    /// 6.4.8 Preprocessing numbers
-    fn number(&mut self, c: u8, mut last_span: usize) -> (PToken, usize) {
+    fn number(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
-        let mut number = vec![c];
+        let first_span = last_span;
        while let Some((span, c)) = self.src.peek() {
            let (span, c) = (*span, *c);
            if c.is_ascii_digit() {
                self.src.next();
                number.push(c);
                last_span = span;
            } else {
                break;
            }
        }
-        (PToken::PpNumber(number), last_span)
+        (
            PToken::PpNumber(&self.src_str[first_span..=last_span]),
            last_span,
        )
    }
    /// 6.4.5 String literals
-    fn string_literal(&mut self, mut last_span: usize) -> (PToken, usize) {
+    fn string_literal(&mut self, mut last_span: usize) -> (PToken<'src>, usize) {
-        let mut string = Vec::new();
+        let first_span = last_span;
        loop {
            let next = self.src.next();
@ -180,13 +184,15 @@ where
                        break;
                    }
                    last_span = span;
                    string.push(c);
                }
                None => return (PToken::Error, last_span),
            }
        }
-        (PToken::StringLiteral(string), last_span)
+        (
            PToken::StringLiteral(&self.src_str[first_span + 1..=last_span]),
            last_span,
        )
    }
    /// source peek
@ -241,17 +247,17 @@ impl CLexExt for u8 {
    }
 }
-impl<'src, I> Iterator for PLexer<I>
+impl<'src, I> Iterator for PLexer<'src, I>
 where
    I: Iterator<Item = (usize, u8)>,
 {
-    type Item = (PToken, Span);
+    type Item = (PToken<'src>, Span);
    /// preprocessing-token:
-    ///   header-name
+    ///   header-name         TODO
    ///   identifier
    ///   pp-number
-    ///   character-constant
+    ///   character-constant  TODO
    ///   string-literal
    ///   punctuator
    ///   each non-white-space character that cannot be one of the above
@ -269,11 +275,10 @@ where
            match (char1, char2, char3) {
                // IDENTIFIER
                (c, _, _) if c.is_c_identifier_nondigit() => {
-                    println!("AA");
+                    break self.identifier(start_span);
                    break self.identifier(c, start_span);
                }
                // NUMBER
-                (c, _, _) if c.is_c_identifier_digit() => break self.number(c, start_span),
+                (c, _, _) if c.is_c_identifier_digit() => break self.number(start_span),
                // STRING
                (b'"', _, _) => break self.string_literal(start_span),
                // WHITESPACE
@ -348,11 +353,10 @@ where
    }
 }
-pub fn preprocess_tokens(
+pub fn preprocess_tokens(src: &str) -> impl Iterator<Item = (PToken<'_>, std::ops::Range<usize>)> {
    src: impl Iterator<Item = (usize, u8)>,
 ) -> impl Iterator<Item = (PToken, std::ops::Range<usize>)> {
    let lexer = PLexer {
-        src: src.peekmore(),
+        src_str: src,
        src: src.bytes().enumerate().peekmore(),
    };
    lexer
 }
@ -361,8 +365,7 @@ pub fn preprocess_tokens(
 mod tests {
    macro_rules! lex_test {
        ($str:expr) => {
-            let bytes = $str.bytes().enumerate();
+            let tokens = super::preprocess_tokens($str);
            let tokens = super::preprocess_tokens(bytes);
            let tokens = tokens.collect::<Vec<_>>();
            insta::assert_debug_snapshot!(tokens);
        };
--- a/parser/src/pre/snapshots/parserprelexertestshello_world.snap
+++ b/parser/src/pre/snapshots/parserprelexertestshello_world.snap
@ -5,22 +5,13 @@ expression: tokens
 [
    (
        Identifier(
-            [
+            "int",
                105,
                110,
                116,
            ],
        ),
        1..4,
    ),
    (
        Identifier(
-            [
+            "main",
                109,
                97,
                105,
                110,
            ],
        ),
        5..9,
    ),
@ -44,12 +35,7 @@ expression: tokens
    ),
    (
        Identifier(
-            [
+            "puts",
                112,
                117,
                116,
                115,
            ],
        ),
        18..22,
    ),
@ -61,21 +47,7 @@ expression: tokens
    ),
    (
        StringLiteral(
-            [
+            "Hello, World!",
                72,
                101,
                108,
                108,
                111,
                44,
                32,
                87,
                111,
                114,
                108,
                100,
                33,
            ],
        ),
        23..37,
    ),
--- a/parser/src/pre/snapshots/parserprelexertestsidentifiers.snap
+++ b/parser/src/pre/snapshots/parserprelexertestsidentifiers.snap
@ -5,34 +5,19 @@ expression: tokens
 [
    (
        Identifier(
-            [
+            "AAAA",
                65,
                65,
                65,
                65,
            ],
        ),
        0..4,
    ),
    (
        Identifier(
-            [
+            "BBBB",
                66,
                66,
                66,
                66,
            ],
        ),
        5..9,
    ),
    (
        Identifier(
-            [
+            "CCCC",
                67,
                67,
                67,
                67,
            ],
        ),
        10..14,
    ),
--- a/parser/src/token.rs
+++ b/parser/src/token.rs
@ -1,7 +1,4 @@
-use crate::{
+use crate::pre::Punctuator;
    pre::{PToken, Punctuator},
    Span,
 };
 pub enum Token {
    Keyword(Keyword),
@ -13,9 +10,3 @@ pub enum Token {
 }
 pub struct Keyword;
 fn from_pre_toks(
    pre_toks: impl Iterator<Item = (PToken, Span)>,
 ) -> impl IntoIterator<Item = (Token, Span)> {
    pre_toks.map(|token| todo!())
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -1,3 +1,10 @@
-fn main() {
+use std::error::Error;
-    println!("Hello, world!");
+
 fn main() -> Result<(), Box<dyn Error>> {
    let input_file = std::env::args().nth(1).expect("first argument");
    let src = std::fs::read_to_string(input_file)?;
    parser::parse_file(src);
    Ok(())
 }