more vm and alloc and intern

This commit is contained in:
nora 2021-12-31 14:09:06 +01:00
parent e58e6e3dc4
commit dc26b52bd2
8 changed files with 200 additions and 112 deletions

View file

@ -4,12 +4,12 @@
//! All AST nodes are bump allocated into the lifetime `'ast`
use crate::errors::Span;
use crate::value::AstSymbol;
use crate::gc::Symbol;
use bumpalo::collections::Vec;
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct Ident<'ast> {
pub sym: AstSymbol<'ast>,
pub struct Ident {
pub sym: Symbol,
pub span: Span,
}
@ -40,7 +40,7 @@ pub enum Stmt<'ast> {
#[derive(Debug, PartialEq)]
pub struct Declaration<'ast> {
pub span: Span,
pub name: Ident<'ast>,
pub name: Ident,
pub init: Expr<'ast>,
}
@ -54,8 +54,8 @@ pub struct Assignment<'ast> {
#[derive(Debug, PartialEq)]
pub struct FnDecl<'ast> {
pub span: Span,
pub name: Ident<'ast>,
pub params: Vec<'ast, Ident<'ast>>,
pub name: Ident,
pub params: Vec<'ast, Ident>,
pub body: Block<'ast>,
}
@ -91,7 +91,7 @@ pub struct WhileStmt<'ast> {
#[derive(Debug, PartialEq)]
pub enum Expr<'ast> {
Ident(Ident<'ast>),
Ident(Ident),
Literal(Literal<'ast>),
UnaryOp(&'ast UnaryOp<'ast>),
BinaryOp(&'ast BinaryOp<'ast>),
@ -180,6 +180,6 @@ pub struct Call<'ast> {
#[derive(Debug, PartialEq)]
pub enum CallKind<'ast> {
Field(Ident<'ast>),
Field(Ident),
Fn(Vec<'ast, Expr<'ast>>),
}

View file

@ -1,7 +1,8 @@
//! The bytecode that is executed in the vm
use crate::errors::Span;
use crate::value::{HashMap, NewSym};
use crate::gc::Symbol;
use crate::HashMap;
use bumpalo::collections::Vec;
#[derive(Debug)]
@ -47,5 +48,5 @@ pub enum Value {
Num(f64),
String,
Array,
Object(HashMap<NewSym, Value>),
Object(HashMap<Symbol, Value>),
}

View file

@ -6,7 +6,8 @@ use crate::ast::{
};
use crate::bytecode::{FnBlock, Instr, Value};
use crate::errors::{CompilerError, Span};
use crate::value::HashMap;
use crate::gc::Symbol;
use crate::HashMap;
use bumpalo::collections::Vec;
use bumpalo::Bump;
use std::cell::RefCell;
@ -16,14 +17,14 @@ type CResult<T> = Result<T, CompilerError>;
#[derive(Debug, Default)]
struct Env<'ast> {
locals: HashMap<&'ast str, usize>,
locals: HashMap<Symbol, usize>,
outer: Option<Rc<RefCell<Env<'ast>>>>,
}
impl Env<'_> {
fn lookup_local(&self, name: &Ident) -> CResult<usize> {
fn lookup_inner(env: &Env, name: &Ident) -> Option<usize> {
env.locals.get(name.sym).copied().or_else(|| {
env.locals.get(&name.sym).copied().or_else(|| {
env.outer
.as_ref()
.map(|outer| lookup_inner(&outer.borrow(), name))
@ -32,7 +33,10 @@ impl Env<'_> {
}
lookup_inner(self, name).ok_or_else(|| {
CompilerError::new(name.span, format!("variable {} not found", name.sym))
CompilerError::new(
name.span,
format!("variable {} not found", name.sym.as_str()),
)
})
}

View file

@ -1,30 +1,12 @@
#![allow(dead_code)]
use crate::HashSet;
use std::collections::LinkedList;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::ptr::NonNull;
/// imagine interning or something here
pub type AstSymbol<'ast> = &'ast str;
/// here is the actual interning or something
pub type NewSym = Gc<str>;
#[cfg(not(feature = "fxhash"))]
#[allow(clippy::disallowed_type)]
pub type HashMap<K, V> = std::collections::HashMap<K, V>;
#[cfg(feature = "fxhash")]
pub type HashMap<K, V> = rustc_hash::FxHashMap<K, V>;
#[cfg(not(feature = "fxhash"))]
#[allow(clippy::disallowed_type)]
pub type HashSet<T> = std::collections::HashSet<T>;
#[cfg(feature = "fxhash")]
pub type HashSet<T> = rustc_hash::FxHashSet<T>;
/// A pointer to a garbage collected value. This pointer *must* always be valid, and a value
/// is only allowed to be freed once no Gc is pointing at it anymore. This is achieved through
/// tracing through all objects from a few known roots and marking every reachable value. All other
@ -59,35 +41,95 @@ impl<T: ?Sized> Clone for Gc<T> {
impl<T: ?Sized> Copy for Gc<T> {}
enum Object {
/// An interned String. Hashing and Equality are O(1) and just look at the pointer address
#[derive(Debug, Clone, Copy)]
pub struct Symbol {
gc: Gc<str>,
}
impl Symbol {
pub fn new(gc: Gc<str>) -> Self {
Self { gc }
}
fn address(&self) -> usize {
self.gc.ptr.as_ptr() as *mut u8 as usize
}
pub fn as_str(&self) -> &str {
self.gc.deref()
}
}
impl Hash for Symbol {
fn hash<H: Hasher>(&self, state: &mut H) {
self.address().hash(state);
}
}
impl PartialEq for Symbol {
fn eq(&self, other: &Self) -> bool {
self.address() == other.address()
}
}
impl Eq for Symbol {}
impl Deref for Symbol {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
#[derive(Debug)]
struct Object {
kind: ObjectKind,
}
#[derive(Debug)]
enum ObjectKind {
String(Gc<str>),
}
#[derive(Debug)]
pub struct RtAlloc {
symbols: HashSet<NonNull<str>>,
objects: LinkedList<Object>,
}
impl RtAlloc {
pub fn alloc_str(&mut self, str: &str) -> Gc<str> {
/// # Safety
/// Promise to not forget to mark any roots and to not deref `Gc<T>` after you've dropped me 🥺
pub unsafe fn new() -> Self {
Self {
symbols: HashSet::default(),
objects: LinkedList::new(),
}
}
fn alloc_str(&mut self, str: &str) -> Gc<str> {
let ptr = Box::into_raw(str.to_owned().into_boxed_str());
// SAFETY: Box cannot be null
let new_nonnull = unsafe { NonNull::new_unchecked(ptr) };
let gc = Gc { ptr: new_nonnull };
let object = Object::String(gc.clone());
let object = Object {
kind: ObjectKind::String(gc.clone()),
};
self.objects.push_back(object);
gc
}
pub fn intern_string(&mut self, str: &str) -> NewSym {
pub fn intern_string(&mut self, str: &str) -> Symbol {
let original_nonnull = NonNull::from(str);
if let Some(interned) = self.symbols.get(&original_nonnull) {
return Gc { ptr: *interned };
return Symbol::new(Gc { ptr: *interned });
}
self.alloc_str(str)
Symbol::new(self.alloc_str(str))
}
}

View file

@ -5,6 +5,8 @@
//! is an iterator, and can therefore be used without any allocations
use crate::errors::{CompilerError, Span};
use crate::gc::Symbol;
use crate::RtAlloc;
use std::iter::Peekable;
use std::str::CharIndices;
@ -13,26 +15,26 @@ use std::str::CharIndices;
///
/// For example `for`, `"hello"`, `main` or `.`
#[derive(Debug, Clone)]
pub struct Token<'code> {
pub struct Token {
pub span: Span,
pub kind: TokenKind<'code>,
pub kind: TokenKind,
}
impl<'code> Token<'code> {
fn single_span(start: usize, kind: TokenKind<'code>) -> Token<'code> {
impl Token {
fn single_span(start: usize, kind: TokenKind) -> Token {
Self {
span: Span::single(start),
kind,
}
}
fn new(span: Span, kind: TokenKind<'code>) -> Token<'code> {
fn new(span: Span, kind: TokenKind) -> Token {
Self { span, kind }
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenKind<'code> {
pub enum TokenKind {
// keywords
Let,
Print,
@ -51,10 +53,10 @@ pub enum TokenKind<'code> {
Or,
Not,
// literals
String(String),
String(Symbol),
Number(f64),
// ident
Ident(&'code str),
Ident(Symbol),
// punctuation
/// ;
Semi,
@ -103,17 +105,19 @@ pub enum TokenKind<'code> {
Error(Box<CompilerError>),
}
#[derive(Debug, Clone)]
pub struct Lexer<'code> {
#[derive(Debug)]
pub struct Lexer<'code, 'gc> {
code: Peekable<CharIndices<'code>>,
src: &'code str,
rt_alloc: &'gc mut RtAlloc,
}
impl<'code> Lexer<'code> {
pub fn new(code: &'code str) -> Self {
impl<'code, 'gc> Lexer<'code, 'gc> {
pub fn new(code: &'code str, rt_alloc: &'gc mut RtAlloc) -> Self {
Self {
code: code.char_indices().peekable(),
src: code,
rt_alloc,
}
}
@ -127,10 +131,10 @@ impl<'code> Lexer<'code> {
fn maybe_next_char<'a>(
&mut self,
expect_char: char,
true_type: TokenKind<'a>,
false_type: TokenKind<'a>,
true_type: TokenKind,
false_type: TokenKind,
start: usize,
) -> Token<'a> {
) -> Token {
if self.expect(expect_char) {
let _ = self.code.next(); // consume first one
Token {
@ -144,10 +148,32 @@ impl<'code> Lexer<'code> {
}
}
}
fn keyword_or_ident(&mut self, name: &str) -> TokenKind {
match name {
"loop" => TokenKind::Loop,
"let" => TokenKind::Let,
"fn" => TokenKind::Fn,
"for" => TokenKind::For,
"false" => TokenKind::False,
"if" => TokenKind::If,
"else" => TokenKind::Else,
"while" => TokenKind::While,
"break" => TokenKind::Break,
"return" => TokenKind::Return,
"true" => TokenKind::True,
"null" => TokenKind::Null,
"not" => TokenKind::Not,
"and" => TokenKind::And,
"or" => TokenKind::Or,
"print" => TokenKind::Print,
_ => TokenKind::Ident(self.rt_alloc.intern_string(name)),
}
}
}
impl<'code> Iterator for Lexer<'code> {
type Item = Token<'code>;
impl<'code, 'gc> Iterator for Lexer<'code, 'gc> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
let token = loop {
@ -244,7 +270,10 @@ impl<'code> Iterator for Lexer<'code> {
}
}
};
break Token::new(Span::start_end(start, end), TokenKind::String(buffer));
break Token::new(
Span::start_end(start, end),
TokenKind::String(self.rt_alloc.intern_string(&buffer)),
);
}
char => {
if char.is_ascii_digit() {
@ -295,7 +324,7 @@ impl<'code> Iterator for Lexer<'code> {
};
break Token::new(
Span::start_end(start, end),
keyword_or_ident(&self.src[start..end]),
self.keyword_or_ident(&self.src[start..end]),
);
} else {
break Token::new(
@ -316,28 +345,6 @@ impl<'code> Iterator for Lexer<'code> {
}
}
fn keyword_or_ident(name: &str) -> TokenKind {
match name {
"loop" => TokenKind::Loop,
"let" => TokenKind::Let,
"fn" => TokenKind::Fn,
"for" => TokenKind::For,
"false" => TokenKind::False,
"if" => TokenKind::If,
"else" => TokenKind::Else,
"while" => TokenKind::While,
"break" => TokenKind::Break,
"return" => TokenKind::Return,
"true" => TokenKind::True,
"null" => TokenKind::Null,
"not" => TokenKind::Not,
"and" => TokenKind::And,
"or" => TokenKind::Or,
"print" => TokenKind::Print,
_ => TokenKind::Ident(name),
}
}
fn is_valid_ident_part(char: char) -> bool {
char.is_alphanumeric() || char == '_'
}
@ -346,7 +353,7 @@ fn is_valid_ident_start(char: char) -> bool {
char.is_alphabetic() || char == '_'
}
#[cfg(test)]
#[cfg(test_ignore)]
mod test {
use crate::lex::Lexer;
use crate::lex::TokenKind::{self, *};

View file

@ -4,29 +4,48 @@ mod ast;
mod bytecode;
mod compile;
mod errors;
mod gc;
mod lex;
mod parse;
mod value;
mod vm;
use crate::ast::Program;
use crate::gc::RtAlloc;
pub use bumpalo::Bump;
pub use lex::*;
pub use parse::*;
#[cfg(not(feature = "fxhash"))]
#[allow(clippy::disallowed_type)]
type HashMap<K, V> = std::collections::HashMap<K, V>;
#[cfg(feature = "fxhash")]
type HashMap<K, V> = rustc_hash::FxHashMap<K, V>;
#[cfg(not(feature = "fxhash"))]
#[allow(clippy::disallowed_type)]
type HashSet<T> = std::collections::HashSet<T>;
#[cfg(feature = "fxhash")]
type HashSet<T> = rustc_hash::FxHashSet<T>;
pub fn run_program(program: &str) {
let ast_alloc = Bump::new();
let lexer = lex::Lexer::new(program);
// SAFETY: I will try to 🥺
let mut runtime = unsafe { RtAlloc::new() };
let lexer = lex::Lexer::new(program, &mut runtime);
let ast = parse::parse(lexer, &ast_alloc);
match ast {
Ok(ast) => process_ast(program, ast),
Ok(ast) => process_ast(program, ast, runtime),
Err(err) => errors::display_error(program, err),
}
}
fn process_ast(program: &str, ast: Program) {
fn process_ast(program: &str, ast: Program, runtime: RtAlloc) {
println!("AST:\n{:?}\n", ast);
let bytecode_alloc = Bump::new();
@ -34,7 +53,11 @@ fn process_ast(program: &str, ast: Program) {
let bytecode = compile::compile(&ast, &bytecode_alloc);
match bytecode {
Ok(code) => println!("Bytecode:\n{:#?}\n", code),
Ok(code) => {
println!("Bytecode:\n{:#?}\n", code);
let _result_lol = vm::execute(&code, runtime);
}
Err(err) => errors::display_error(program, err),
}
}

View file

@ -14,10 +14,9 @@ use bumpalo::Bump;
use std::iter::Peekable;
#[derive(Debug)]
struct Parser<'code, 'ast, I>
struct Parser<'ast, I>
where
I: Iterator<Item = Token<'code>>,
I: 'code,
I: Iterator<Item = Token>,
{
tokens: Peekable<I>,
depth: usize,
@ -26,8 +25,8 @@ where
bump: &'ast Bump,
}
pub fn parse<'ast, 'code>(
tokens: impl Iterator<Item = Token<'code>> + 'code,
pub fn parse<'lexer, 'ast>(
tokens: impl Iterator<Item = Token> + 'lexer,
ast_bump: &'ast Bump,
) -> Result<Program<'ast>, CompilerError> {
let mut parser = Parser {
@ -72,10 +71,9 @@ macro_rules! enter_parse {
};
}
impl<'code, 'ast, I> Parser<'code, 'ast, I>
impl<'ast, I> Parser<'ast, I>
where
I: Iterator<Item = Token<'code>>,
I: 'code,
I: Iterator<Item = Token>,
{
const MAX_DEPTH: usize = 100;
@ -189,7 +187,7 @@ where
}))
}
fn fn_args(&mut self) -> ParseResult<Vec<'ast, Ident<'ast>>> {
fn fn_args(&mut self) -> ParseResult<Vec<'ast, Ident>> {
enter_parse!(self);
self.expect(TokenKind::ParenO)?;
@ -542,8 +540,8 @@ where
let _ = self.expect(TokenKind::ParenC)?;
Ok(expr)
}
TokenKind::Ident(name) => Ok(Expr::Ident(Ident {
sym: self.bump.alloc_str(name),
TokenKind::Ident(sym) => Ok(Expr::Ident(Ident {
sym,
span: next.span,
})),
TokenKind::Error(error) => Err(*error),
@ -556,17 +554,14 @@ where
return_expr
}
fn ident(&mut self) -> ParseResult<Ident<'ast>> {
fn ident(&mut self) -> ParseResult<Ident> {
enter_parse!(self);
let Token { kind, span } = self
.next()
.ok_or_else(|| CompilerError::eof("identifier"))?;
let return_expr = match kind {
TokenKind::Ident(name) => Ok(Ident {
sym: self.bump.alloc_str(name),
span,
}),
TokenKind::Ident(sym) => Ok(Ident { sym, span }),
TokenKind::Error(error) => Err(*error),
_ => {
return Err(CompilerError::new(
@ -602,11 +597,7 @@ where
return_expr
}
fn parse_list<T, F>(
&mut self,
close: TokenKind<'code>,
mut parser: F,
) -> ParseResult<Vec<'ast, T>>
fn parse_list<T, F>(&mut self, close: TokenKind, mut parser: F) -> ParseResult<Vec<'ast, T>>
where
F: FnMut(&mut Self) -> ParseResult<T>,
{
@ -645,19 +636,19 @@ where
// token helpers
fn next(&mut self) -> Option<Token<'code>> {
fn next(&mut self) -> Option<Token> {
self.tokens.next()
}
fn peek(&mut self) -> Option<&Token<'code>> {
fn peek(&mut self) -> Option<&Token> {
self.tokens.peek()
}
fn peek_kind(&mut self) -> Option<&TokenKind<'code>> {
fn peek_kind(&mut self) -> Option<&TokenKind> {
self.peek().map(|token| &token.kind)
}
fn expect(&mut self, kind: TokenKind<'code>) -> ParseResult<Token> {
fn expect(&mut self, kind: TokenKind) -> ParseResult<Token> {
if let Some(token) = self.next() {
if token.kind == kind {
Ok(token)

20
src/vm.rs Normal file
View file

@ -0,0 +1,20 @@
use crate::bytecode::FnBlock;
use crate::gc::RtAlloc;
type VmResult = Result<(), ()>;
pub fn execute<'bc>(bytecode: &'bc [FnBlock<'bc>], alloc: RtAlloc) -> Result<(), ()> {
let _vm = Vm {
blocks: bytecode,
current: bytecode.first().ok_or(())?,
alloc,
};
Ok(())
}
struct Vm<'bc> {
blocks: &'bc [FnBlock<'bc>],
current: &'bc FnBlock<'bc>,
alloc: RtAlloc,
}