From 0f7999cc0f4efb1b2c84dc544ded9d8315f5aa2b Mon Sep 17 00:00:00 2001 From: Nilstrieb Date: Sun, 3 Oct 2021 21:22:40 +0200 Subject: [PATCH] started lexer --- README.md | 142 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lex.rs | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 8 +++ src/parse.rs | 0 4 files changed, 295 insertions(+) create mode 100644 README.md create mode 100644 src/lex.rs create mode 100644 src/lib.rs create mode 100644 src/parse.rs diff --git a/README.md b/README.md new file mode 100644 index 0000000..86094b7 --- /dev/null +++ b/README.md @@ -0,0 +1,142 @@ +_ is a small embeddable scripting language + +_ is inspired by Javascript, Lox, Lua, Python, Rust and more + +# Reference + +## Overview + +Declaring variables using `let` + +``` +let hello = 4 +``` + +Semicolons are not needed + +``` +let test = 5 +let another = 4 +``` + +The language has strings, numbers, arrays, objects and null and booleans + +``` +let string = "hallo" +let number = 4 +let array = [] +let object = {} +let _null = null +let bool = true +``` + +You access properties on objects using `.` + +``` +let obj = {} +obj.hi = "hi!" +``` + +Functions are first class + +``` +let obj = {} +obj.hello = helloFn +obj.hello() +``` + +Functions are declared using `fn` + +``` +fn greet(name) { + return "hello, " + name +} +``` + +Comments using `#` +``` +# hi! +``` + +There are many native functions, that can easily be customized and added/removed by the host + +``` +# rocket game +turnRocketLeft(29) +turnRocketRight(32) + +# chat bot +message.respond("hi") + +# dangerous http requests +fn callback(html) { + print(html) +} +fetch("https://github.com/Nilstrieb", callback); +``` + +Basic arithmetic and boolean logic is available + +``` +let a = 5 +let b = 5 +print(a + b / b * b - a % b) +print(true and false or false or true and false) +``` + +Loops and conditionals + +``` +let x = true +if x { + print("true!") +} else { + print("false :(") +} + +loop { + while 1 > 5 { + print("yeet") + break + } + # no for loops for now, but will be added (probably like python) +} +``` + +_ is dynamically and *strongly* typed + +## Detail + +### Reserved Keywords + +#### Statements +`fn` +`let` +`if` +`else` +`loop` +`while` +`for` + +#### Values +`true` +`false` +`null` + +#### Operators +`not` +`and` +`or` + +### Operators +`==` +`>=` +`>` +`<=` +`<` +`!=` +`+` +`-` +`*` +`/` +`%` \ No newline at end of file diff --git a/src/lex.rs b/src/lex.rs new file mode 100644 index 0000000..709b567 --- /dev/null +++ b/src/lex.rs @@ -0,0 +1,145 @@ +#![allow(dead_code)] + +use std::iter::Peekable; +use std::str::CharIndices; + +#[derive(Debug, Copy, Clone, PartialOrd, PartialEq, Ord, Eq, Hash)] +struct Span(usize); + +#[derive(Debug, Clone)] +pub struct Token<'code> { + span: Span, + kind: TokenType<'code>, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum TokenType<'code> { + // keywords + Let, + Fn, + If, + Else, + Loop, + While, + For, + True, + False, + Null, + And, + Or, + Not, + // literals + String(&'code str), + Number(f64), + // ident + Ident(&'code str), + // punctuation + /// + + Plus, + /// - + Minus, + /// * + Asterisk, + /// / + Slash, + /// % + Percent, + /// { + BraceO, + /// } + BraceC, + /// [ + BracketO, + /// ] + BracketC, + /// ( + ParenO, + /// ) + ParenC, + /// . + Dot, + /// , + Comma, + // = + Equal, + /// == + EqualEqual, + /// != + BangEqual, + /// > + GreaterThan, + /// < + LessThan, + /// >= + GreaterThanEqual, + /// <= + LessThanEqual, +} + +#[derive(Debug, Clone)] +pub struct Lexer<'code> { + code: Peekable>, + state: LexState, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +enum LexState { + Init, + StrLit(usize), + NumLit(usize), + Ident(usize), + Equal(usize), + Bang(usize), + GreaterThan(usize), + LessThan(usize), +} + +impl<'code> Lexer<'code> { + pub fn lex(code: &'code str) -> Self { + Self { + code: code.char_indices().peekable(), + state: LexState::Init, + } + } + + fn expect(&mut self, expected: char) -> bool { + self.code + .peek() + .map(|(_, char)| *char == expected) + .unwrap_or(false) + } +} + +impl<'code> Iterator for Lexer<'code> { + type Item = Result, LexError>; + + fn next(&mut self) -> Option { + loop { + match self.state { + LexState::Init => match self.code.next() { + _ => {} + }, + LexState::StrLit(_) => {} + LexState::NumLit(_) => {} + LexState::Ident(_) => {} + LexState::Equal(_) => {} + LexState::Bang(start) => { + return if self.expect('=') { + let _ = self.code.next(); + Some(Ok(Token { + span: Span(start), + kind: TokenType::BangEqual, + })) + } else { + Some(Err(LexError)) + } + } + LexState::GreaterThan(_) => {} + LexState::LessThan(_) => {} + } + } + } +} + +#[derive(Debug)] +pub struct LexError; diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..81f4f87 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,8 @@ +mod lex; +mod parse; + +pub fn run_program(program: &str) { + let lexer = lex::Lexer::lex(program); + let tokens: Result, _> = lexer.collect(); + println!("{:#?}", tokens); +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..e69de29