From c6765d7da6b63f4b839b2af402ecb235c8224a38 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Wed, 29 Dec 2021 17:00:30 +0100 Subject: [PATCH] start bytecode compilation --- Cargo.lock | 9 ++ Cargo.toml | 5 + README.md | 22 ++--- clippy.toml | 2 + src/ast.rs | 8 +- src/bird/mem.rs | 13 --- src/bird/mod.rs | 10 -- src/bytecode.rs | 45 +++++++++ src/compile.rs | 238 ++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 19 +++- src/parse/mod.rs | 4 +- src/parse/test.rs | 2 +- src/value.rs | 9 ++ test.sl | 2 +- 14 files changed, 342 insertions(+), 46 deletions(-) create mode 100644 clippy.toml delete mode 100644 src/bird/mem.rs delete mode 100644 src/bird/mod.rs create mode 100644 src/bytecode.rs create mode 100644 src/compile.rs create mode 100644 src/value.rs diff --git a/Cargo.lock b/Cargo.lock index 321806b..017fa33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "script_lang" version = "0.1.0" +dependencies = [ + "rustc-hash", +] diff --git a/Cargo.toml b/Cargo.toml index 3420f9d..88c10e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,8 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +rustc-hash = { version = "1.1.0", optional = true } + + +[features] +fxhash = ["rustc-hash"] diff --git a/README.md b/README.md index ff9319f..d13cb1e 100644 --- a/README.md +++ b/README.md @@ -10,22 +10,22 @@ language_name is inspired by Javascript, Lox, Lua, Python, Rust and more Declaring variables using `let` -``` +```rust let hello = 4; ``` Semicolons are needed :) -``` +```rust let test = 5; let another = 4; ``` The language has strings, numbers, arrays, objects and null and booleans -``` +```rust let string = "hallo"; -let number = 4; +let number = 4; let array = []; let object = {}; let _null = null; @@ -34,14 +34,14 @@ let bool = true; You access properties on objects using `.` -``` +```rust let obj = {}; obj.hi = "hi!"; ``` Functions are first class -``` +```rust let obj = {}; obj.hello = helloFn; obj.hello(); @@ -49,7 +49,7 @@ obj.hello(); Functions are declared using `fn` -``` +```rust fn greet(name) { return "hello, " + name; } @@ -58,7 +58,7 @@ fn greet(name) { Functions are closures Comments using `#` -``` +```py # hi! ``` @@ -72,7 +72,7 @@ comment There are many native functions, that can easily be customized and added/removed by the host -``` +```rust # rocket game turnRocketLeft(29); turnRocketRight(32); @@ -89,7 +89,7 @@ fetch("https://github.com/Nilstrieb", callback); Basic arithmetic and boolean logic is available -``` +```rust let a = 5; let b = 5; print(a + b / b * b - a % b); @@ -98,7 +98,7 @@ print(true and false or false or true and false); Loops and conditionals -``` +```rust let x = true; if x { print("true!"); diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000..faa64c0 --- /dev/null +++ b/clippy.toml @@ -0,0 +1,2 @@ +# we want to use our custom type from `values.rs`, so that consumers can choose between which HashMap they want +disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"] diff --git a/src/ast.rs b/src/ast.rs index 6d9ecbe..5eb8192 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2,13 +2,11 @@ //! The AST module contains all structs and enums for the abstract syntax tree generated by the parser use crate::errors::Span; +use crate::value::Symbol; -/// imagine interning or something here -pub type Symbol = String; - -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Ident { - pub name: Symbol, + pub sym: Symbol, pub span: Span, } diff --git a/src/bird/mem.rs b/src/bird/mem.rs deleted file mode 100644 index 0a33ae6..0000000 --- a/src/bird/mem.rs +++ /dev/null @@ -1,13 +0,0 @@ -use std::collections::HashMap; -use std::rc::Rc; - -#[derive(Debug, PartialEq, Clone)] -enum Value { - Null, - Bool(bool), - Number(f64), - String(Rc), - Object(Rc>), - Array(Rc>), - Fn(Rc<()>), -} diff --git a/src/bird/mod.rs b/src/bird/mod.rs deleted file mode 100644 index c0d41a5..0000000 --- a/src/bird/mod.rs +++ /dev/null @@ -1,10 +0,0 @@ -#![allow(dead_code)] - -mod mem; - -use crate::ast::Program; - -#[derive(Debug)] -struct Vm {} - -fn execute(_program: Program) {} diff --git a/src/bytecode.rs b/src/bytecode.rs new file mode 100644 index 0000000..ce67516 --- /dev/null +++ b/src/bytecode.rs @@ -0,0 +1,45 @@ +use crate::value::{HashMap, Symbol}; +use std::rc::Rc; + +#[derive(Debug, Default)] +pub struct FnBlock { + pub code: Vec, + pub stack_sizes: Vec, + pub arity: u8, +} + +// todo: this should be copy in the end tbh +#[derive(Debug)] +pub enum Instr { + /// Store the current value on the stack to the stack location with the local offset `usize` + Store(usize), + /// Load the variable value from the local offset `usize` onto the stack + Load(usize), + /// Push a value onto the stack + PushVal(Box), + /// Negate the top value on the stack. Only works with numbers and booleans + Neg, + BinAdd, + BinSub, + BinMul, + BinDiv, + BinMod, + BinAnd, + BinOr, + CmpGreater, + CmpGreaterEq, + CmpLess, + CmpLessEq, + CmpEq, + CmpNotEq, +} + +#[derive(Debug)] +pub enum Value { + Null, + Bool(bool), + Num(f64), + String(Rc), + Array(Vec), + Object(HashMap), +} diff --git a/src/compile.rs b/src/compile.rs new file mode 100644 index 0000000..db68f80 --- /dev/null +++ b/src/compile.rs @@ -0,0 +1,238 @@ +use crate::ast::{ + Assignment, BinaryOp, BinaryOpKind, Block, Call, Declaration, Expr, FnDecl, Ident, IfStmt, + Literal, Program, Stmt, UnaryOp, WhileStmt, +}; +use crate::bytecode::{FnBlock, Instr, Value}; +use crate::errors::{CompilerError, Span}; +use crate::value::HashMap; + +type CResult = Result; + +#[derive(Debug, Default)] +struct Compiler { + blocks: Vec, + current_block: usize, + /// the current local variables that are in scope, only needed for compiling + locals: Vec>, +} + +pub fn compile(ast: &Program) -> Result, CompileError> { + let mut compiler = Compiler::default(); + + compiler.compile(ast)?; + + Ok(compiler.blocks) +} + +impl Compiler { + fn compile(&mut self, ast: &Program) -> CResult<()> { + let global_block = FnBlock::default(); + self.blocks.push(global_block); + self.current_block = self.blocks.len() - 1; + self.locals.push(HashMap::default()); + self.compile_fn_block(&ast.0)?; + Ok(()) + } + + fn compile_fn_block(&mut self, stmts: &[Stmt]) -> CResult<()> { + for stmt in stmts { + match stmt { + Stmt::Declaration(inner) => self.compile_declaration(inner), + Stmt::Assignment(inner) => self.compile_assignment(inner), + Stmt::FnDecl(inner) => self.compile_fn_decl(inner), + Stmt::If(inner) => self.compile_if(inner), + Stmt::Loop(block, span) => self.compile_loop(block, *span), + Stmt::While(inner) => self.compile_while(inner), + Stmt::Break(span) => self.compile_break(*span), + Stmt::Return(expr, span) => self.compile_return(expr, *span), + Stmt::Block(inner) => self.compile_block(inner), + Stmt::Expr(inner) => self.compile_expr(inner), + }?; + } + + Ok(()) + } + + fn compile_declaration(&mut self, declaration: &Declaration) -> CResult<()> { + // Compile the expression, the result of the expression will be the last thing left on the stack + self.compile_expr(&declaration.init)?; + // Now just remember that the value at this stack location is this variable name + let stack_pos = self.current_stack_top(); + self.locals().insert(declaration.name.clone(), stack_pos); + Ok(()) + } + + fn compile_assignment(&mut self, assignment: &Assignment) -> CResult<()> { + let local = match &assignment.lhs { + Expr::Ident(ident) => ident, + _ => todo!(), + }; + + let stack_pos = self.lookup_local(local)?; + + self.compile_expr(&assignment.rhs)?; + + self.push_instr(Instr::Store(stack_pos), StackChange::Shrink); + + Ok(()) + } + + fn compile_fn_decl(&mut self, _: &FnDecl) -> CResult<()> { + todo!() + } + + fn compile_if(&mut self, _: &IfStmt) -> CResult<()> { + todo!() + } + + fn compile_loop(&mut self, _: &Block, _: Span) -> CResult<()> { + todo!() + } + + fn compile_while(&mut self, _: &WhileStmt) -> CResult<()> { + todo!() + } + + fn compile_break(&mut self, _: Span) -> CResult<()> { + todo!() + } + + fn compile_return(&mut self, _: &Option, _: Span) -> CResult<()> { + todo!() + } + + fn compile_block(&mut self, _: &Block) -> CResult<()> { + todo!() + } + + fn compile_expr(&mut self, expr: &Expr) -> CResult<()> { + match expr { + Expr::Ident(inner) => self.compile_expr_ident(inner), + Expr::Literal(inner) => self.compile_expr_literal(inner), + Expr::UnaryOp(inner) => self.compile_expr_unary(inner), + Expr::BinaryOp(inner) => self.compile_expr_binary(inner), + Expr::Call(inner) => self.compile_expr_call(inner), + } + } + + fn compile_expr_ident(&mut self, name: &Ident) -> CResult<()> { + let offset = self.lookup_local(name)?; + self.push_instr(Instr::Load(offset), StackChange::Grow); + Ok(()) + } + + fn compile_expr_literal(&mut self, lit: &Literal) -> CResult<()> { + let value = match lit { + Literal::String(str, _) => Value::String(str.clone().into()), + Literal::Number(num, _) => Value::Num(*num), + Literal::Array(vec, _) => { + if vec.is_empty() { + Value::Array(Vec::new()) + } else { + todo!() + } + } + Literal::Object(_) => Value::Object(HashMap::default()), + Literal::Boolean(bool, _) => Value::Bool(*bool), + Literal::Null(_) => Value::Null, + }; + + self.push_instr(Instr::PushVal(Box::new(value)), StackChange::Grow); + + Ok(()) + } + + fn compile_expr_unary(&mut self, inner: &UnaryOp) -> CResult<()> { + self.compile_expr(&inner.expr)?; + + // not and neg compile to the same instruction + self.push_instr(Instr::Neg, StackChange::None); + + Ok(()) + } + + fn compile_expr_binary(&mut self, inner: &BinaryOp) -> CResult<()> { + // todo: is this the correct ordering? + self.compile_expr(&inner.lhs)?; + self.compile_expr(&inner.rhs)?; + + let instruction = match inner.kind { + BinaryOpKind::Add => Instr::BinAdd, + BinaryOpKind::And => Instr::BinAnd, + BinaryOpKind::Or => Instr::BinOr, + BinaryOpKind::Equal => Instr::CmpEq, + BinaryOpKind::GreaterEqual => Instr::CmpGreaterEq, + BinaryOpKind::Greater => Instr::CmpGreater, + BinaryOpKind::LessEqual => Instr::CmpLessEq, + BinaryOpKind::Less => Instr::CmpLess, + BinaryOpKind::NotEqual => Instr::CmpNotEq, + BinaryOpKind::Sub => Instr::BinSub, + BinaryOpKind::Mul => Instr::BinMul, + BinaryOpKind::Div => Instr::BinDiv, + BinaryOpKind::Mod => Instr::BinMod, + }; + + self.push_instr(instruction, StackChange::Shrink); + + Ok(()) + } + + fn compile_expr_call(&mut self, _: &Call) -> CResult<()> { + todo!() + } + + fn locals(&mut self) -> &mut HashMap { + self.locals.last_mut().expect("no locals found") + } + + fn lookup_local(&self, name: &Ident) -> CResult { + for locals in self.locals.iter().rev() { + if let Some(&position) = locals.get(name) { + return Ok(position); + } + } + + Err(CompileError) + } + + fn current_stack_top(&self) -> usize { + let block = &self.blocks[self.current_block]; + *block.stack_sizes.last().expect("empty stack") + } + + fn push_instr(&mut self, instr: Instr, stack_change: StackChange) { + let block = &mut self.blocks[self.current_block]; + let stack_top = block.stack_sizes.last().copied().unwrap_or(0); + let new_stack_top = stack_top as isize + stack_change as isize; + assert!(new_stack_top >= 0, "instruction popped stack below 0"); + let new_stack_top = new_stack_top as usize; + + block.code.push(instr); + block.stack_sizes.push(new_stack_top); + } +} + +#[derive(Debug, Copy, Clone)] +#[repr(i8)] +enum StackChange { + Shrink = -1, + None = 0, + Grow = 1, +} + +#[derive(Debug)] +pub struct CompileError; + +impl CompilerError for CompileError { + fn span(&self) -> Span { + todo!() + } + + fn message(&self) -> String { + todo!() + } + + fn note(&self) -> Option { + todo!() + } +} diff --git a/src/lib.rs b/src/lib.rs index 93246cf..f64ee8b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,12 @@ +#![deny(clippy::disallowed_type)] + mod ast; -mod bird; +mod bytecode; +mod compile; mod errors; mod lex; mod parse; +mod value; pub use lex::*; pub use parse::*; @@ -15,14 +19,23 @@ pub fn run_program(program: &str) { let tokens = success.into_iter().collect::, _>>().unwrap(); println!( - "{:?}", + "Tokens:\n{:?}\n", tokens.iter().map(|token| &token.kind).collect::>() ); let ast = parse::parse(tokens); match ast { - Ok(ast) => println!("{:?}", ast), + Ok(ast) => { + println!("AST:\n{:?}\n", ast); + + let bytecode = compile::compile(&ast); + + match bytecode { + Ok(code) => println!("Bytecode:\n{:#?}\n", code), + Err(err) => errors::display_error(program, err), + } + } Err(err) => errors::display_error(program, err), } } else { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 1845abb..3d222c2 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -495,7 +495,7 @@ impl<'code> Parser<'code> { TokenType::Ident(name) => { let name_owned = name.to_owned(); Ok(Expr::Ident(Ident { - name: name_owned, + sym: name_owned, span: next.span, })) } @@ -513,7 +513,7 @@ impl<'code> Parser<'code> { TokenType::Ident(name) => { let name_owned = name.to_owned(); Ok(Ident { - name: name_owned, + sym: name_owned, span, }) } diff --git a/src/parse/test.rs b/src/parse/test.rs index 59c75db..4bf72df 100644 --- a/src/parse/test.rs +++ b/src/parse/test.rs @@ -27,7 +27,7 @@ fn num_lit(number: f64) -> Expr { fn ident(name: &str) -> Ident { Ident { - name: name.to_string(), + sym: name.to_string(), span: Default::default(), } } diff --git a/src/value.rs b/src/value.rs new file mode 100644 index 0000000..f716e9a --- /dev/null +++ b/src/value.rs @@ -0,0 +1,9 @@ +/// imagine interning or something here +pub type Symbol = String; + +#[cfg(not(feature = "fxhash"))] +#[allow(clippy::disallowed_type)] +pub type HashMap = std::collections::HashMap; + +#[cfg(feature = "fxhash")] +pub type HashMap = rustc_hash::FxHashMap; diff --git a/test.sl b/test.sl index e0222a7..8e8a2ae 100644 --- a/test.sl +++ b/test.sl @@ -1 +1 @@ -[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[=f \ No newline at end of file +let x = 2 * 3; \ No newline at end of file