diff --git a/Cargo.lock b/Cargo.lock index f3f53ea..de1588b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -44,6 +44,7 @@ name = "analysis" version = "0.1.0" dependencies = [ "bumpalo", + "either", "indexmap", "parser", "rustc-hash", @@ -163,6 +164,12 @@ dependencies = [ "syn", ] +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + [[package]] name = "encode_unicode" version = "0.3.6" diff --git a/analysis/Cargo.toml b/analysis/Cargo.toml index 575e6b3..a552838 100644 --- a/analysis/Cargo.toml +++ b/analysis/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] bumpalo = "3.10.0" +either = "1.8.1" indexmap = "1.9.1" parser = { path = "../parser" } rustc-hash = "1.1.0" diff --git a/analysis/src/ctxt.rs b/analysis/src/ctxt.rs index f61d0e0..5da7966 100644 --- a/analysis/src/ctxt.rs +++ b/analysis/src/ctxt.rs @@ -1,7 +1,10 @@ -use std::cell::{Cell, RefCell}; +use std::{ + cell::{Cell, RefCell}, + fmt::Debug, +}; use parser::{ - ast::{self, IntTyKind}, + ast::{self, IntSign, IntTyKind, IntTy}, Symbol, }; use rustc_hash::{FxHashMap, FxHashSet}; @@ -11,26 +14,84 @@ use crate::{ ty::{Ty, TyKind}, }; -#[derive(Debug)] pub struct LoweringCx<'cx> { tys: RefCell>>, layouts: RefCell>, string_literals: RefCell>, pub(crate) arena: &'cx bumpalo::Bump, next_def_id: Cell, + pub types: CommonTypes<'cx>, /**/ pub(crate) global_decls: FxHashMap>, } +impl Debug for LoweringCx<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("lcx") + } +} + +pub struct CommonInt<'cx> { + pub signed: Ty<'cx>, + pub unsigned: Ty<'cx>, +} + +pub struct CommonTypes<'cx> { + pub void: Ty<'cx>, + pub char: Ty<'cx>, + pub su_char: CommonInt<'cx>, + pub short: CommonInt<'cx>, + pub int: CommonInt<'cx>, + pub long: CommonInt<'cx>, +} + +pub(crate) fn intern_ty_inner<'cx>( + tys: &RefCell>>, + arena: &'cx bumpalo::Bump, + kind: TyKind<'cx>, +) -> Ty<'cx> { + let opt_kind = tys.borrow().get(&kind).copied(); + match opt_kind { + Some(ty) => Ty::new_unchecked(ty), + None => { + let kind = arena.alloc(kind); + tys.borrow_mut().insert(kind); + Ty::new_unchecked(kind) + } + } +} + +impl<'cx> CommonTypes<'cx> { + fn new(tys: &RefCell>>, arena: &'cx bumpalo::Bump) -> Self { + let int = |sign, kind| intern_ty_inner(tys, arena, TyKind::Int(IntTy(sign, kind))); + let int_pair = |kind| CommonInt { + signed: int(IntSign::Signed, kind), + unsigned: int(IntSign::Unsigned, kind), + }; + + Self { + void: intern_ty_inner(tys, arena, TyKind::Void), + char: intern_ty_inner(tys, arena, TyKind::Char), + su_char: int_pair(IntTyKind::Char), + short: int_pair(IntTyKind::Short), + int: int_pair(IntTyKind::Int), + long: int_pair(IntTyKind::Long), + } + } +} + impl<'cx> LoweringCx<'cx> { pub fn new(arena: &'cx bumpalo::Bump) -> Self { + let tys = RefCell::default(); + let types = CommonTypes::new(&tys, arena); LoweringCx { - tys: RefCell::default(), + tys, layouts: RefCell::default(), string_literals: RefCell::default(), arena, next_def_id: Cell::new(DefId(0)), global_decls: FxHashMap::default(), + types, } } @@ -53,15 +114,7 @@ impl<'cx> LoweringCx<'cx> { } pub(crate) fn intern_ty(&self, kind: TyKind<'cx>) -> Ty<'cx> { - let opt_kind = self.tys.borrow().get(&kind).copied(); - match opt_kind { - Some(ty) => Ty::new_unchecked(ty), - None => { - let kind = self.arena.alloc(kind); - self.tys.borrow_mut().insert(kind); - Ty::new_unchecked(kind) - } - } + intern_ty_inner(&self.tys, self.arena, kind) } fn intern_layout(&self, layout: Layout) -> &'cx Layout { diff --git a/analysis/src/ir.rs b/analysis/src/ir.rs index 87e96d0..440d8dd 100644 --- a/analysis/src/ir.rs +++ b/analysis/src/ir.rs @@ -31,11 +31,19 @@ //! call yeet(%val) //! ``` +mod custom; +mod info; mod pretty; mod validate; +mod visit; + +#[doc(hidden)] +pub use custom::help as custom_help; use std::fmt::{Debug, Display}; +pub use custom::define_ir_func; +use either::Either; use parser::{ast, Span, Symbol}; pub use pretty::{func_to_string, ir_to_string}; use rustc_hash::FxHashMap; @@ -74,6 +82,13 @@ pub struct Layout { pub align: u64, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Location { + pub bb: BbIdx, + /// None means the terminator. + pub stmt: Option, +} + pub struct Ir<'cx> { pub funcs: FxHashMap>, } @@ -89,7 +104,7 @@ pub struct Func<'cx> { pub arity: usize, } -#[derive(Clone, Copy)] +#[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct BbIdx(pub u32); #[derive(Debug, Clone)] @@ -116,7 +131,7 @@ pub struct Statement { #[derive(Debug, Clone)] pub enum StatementKind { Alloca { - reg: Register, + result: Register, size: Operand, align: Operand, }, @@ -133,19 +148,19 @@ pub enum StatementKind { align: Operand, }, BinOp { + result: Register, kind: BinKind, lhs: Operand, rhs: Operand, - result: Register, }, UnaryOperation { - rhs: Operand, - kind: UnaryKind, result: Register, + kind: UnaryKind, + rhs: Operand, }, PtrOffset { result: Register, - reg: Register, + ptr: Operand, amount: Operand, }, Call { @@ -209,6 +224,9 @@ pub enum ConstValue { } impl Func<'_> { + pub fn bb(&self, i: BbIdx) -> &BasicBlock { + &self.bbs[i.as_usize()] + } pub fn bb_mut(&mut self, i: BbIdx) -> &mut BasicBlock { &mut self.bbs[i.as_usize()] } @@ -219,7 +237,13 @@ impl BbIdx { Self(n.try_into().unwrap()) } pub fn as_usize(self) -> usize { - self.0 as _ + self.0.try_into().unwrap() + } +} + +impl Register { + pub fn as_usize(self) -> usize { + self.0.try_into().unwrap() } } @@ -265,4 +289,12 @@ impl Branch { pub fn dummy() -> Self { Branch::Goto(BbIdx(u32::MAX)) } + + pub fn successors(&self) -> impl Iterator { + match self { + Branch::Goto(bb) => Either::Left(Some(*bb).into_iter()), + Branch::Switch { cond: _, yes, no } => Either::Right([*yes, *no].into_iter()), + Branch::Ret(_) => Either::Left(None.into_iter()), + } + } } diff --git a/analysis/src/ir/custom.rs b/analysis/src/ir/custom.rs new file mode 100644 index 0000000..3db0943 --- /dev/null +++ b/analysis/src/ir/custom.rs @@ -0,0 +1,100 @@ +pub mod help { + use crate::ir::{Operand, Register, ConstValue}; + + pub trait AsOperand { + fn as_operand(self) -> Operand; + } + + pub fn op(o: impl AsOperand) -> Operand { + o.as_operand() + } + + impl AsOperand for Register { + fn as_operand(self) -> Operand { + Operand::Reg(self) + } + } + + impl AsOperand for u64 { + fn as_operand(self) -> Operand { + Operand::Const(ConstValue::u64(self)) + } + } +} + +#[macro_export] +macro_rules! define_ir_func { + (@bbs($bb:ident) { $( $stmt:expr );* }) => { + $( + let s = $crate::ir::Statement { + span: ::parser::Span::dummy(), + kind: { + #[allow(unused_imports)] + use $crate::ir::{StatementKind::*, Register, custom_help::*, BinKind}; + $stmt + }, + }; + $bb.statements.push(s); + )* + }; + (@body($f:ident) { $($num:literal : { $( $stmt:expr );* $(;)? => $branch:expr })* }) => { + $( + assert_eq!($f.bbs.len(), $num); + #[allow(unused_mut)] + let mut bb = $crate::ir::BasicBlock { + statements: Vec::new(), + term: { + #[allow(unused_imports)] + use $crate::ir::{Register, custom_help::*, Branch::*}; + $branch + }, + }; + $crate::ir::define_ir_func! { @bbs(bb) { $($stmt);* } }; + $f.bbs.push(bb); + )* + }; + + // entrypoint + ( + def($lcx:ident) $name:ident (), regs($regs:literal) { + $($body:tt)* + } + ) => {{ + let mut f = $crate::ir::Func { + name: ::parser::Symbol::intern(stringify!($name)), + def_span: ::parser::Span::dummy(), + arity: 0, + ret_ty: $lcx.intern_ty($crate::ty::TyKind::Void), + bbs: Vec::new(), + regs: vec![$crate::ir::RegisterData { + tyl: $lcx.layout_of($lcx.types.int.unsigned), + name: None, + }; $regs], + }; + $crate::ir::define_ir_func! { @body(f) { $($body)* } }; + $crate::ir::validate(&f); + f + }}; +} + +pub use define_ir_func; + +#[cfg(test)] +mod tests { + use crate::LoweringCx; + + #[test] + fn define() { + let arena = bumpalo::Bump::new(); + let lcx = LoweringCx::new(&arena); + + let _f = define_ir_func! { + def(lcx) name (), regs(1) { + 0: { + BinOp { result: Register(0), kind: BinKind::Add, lhs: op(0), rhs: op(2) }; + => Ret(op(Register(0))) + } + } + }; + } +} diff --git a/analysis/src/ir/info.rs b/analysis/src/ir/info.rs new file mode 100644 index 0000000..0d581a1 --- /dev/null +++ b/analysis/src/ir/info.rs @@ -0,0 +1,145 @@ +use rustc_hash::FxHashSet; + +use crate::ir::visit::Visitor; + +use super::{BbIdx, Branch, Func, Location, Operand}; + +pub fn traverse_postorder<'a>(func: &'a Func<'_>) -> Vec { + // the final traversial, backwards. + // the starting bb has to be visited last. + let mut traversal = vec![BbIdx(0)]; + let mut i = 0; + let mut seen = FxHashSet::default(); + + seen.insert(BbIdx(0)); + + while let Some(&next) = traversal.get(i) { + let successors = func.bb(next).term.successors(); + for succ in successors.filter(|&s| seen.insert(s)) { + traversal.push(succ); + } + + i += 1; + } + + traversal.reverse(); + traversal +} + +/// The last usage of each SSA register. After that location, the SSA register is not used anymore +/// and can be discarded. Registers with `None` are never used. +pub fn last_register_uses(func: &Func<'_>) -> Vec> { + // TODO: This does not work when registers are used after backedges. + // When mem2reg/stack2ssa is implemented, this will be very bad. Right now, it's totally fine! + let mut uses = vec![None; func.regs.len()]; + + for bb in traverse_postorder(func) { + let uses = &mut *uses; + + let mut check_op = |op: Operand, stmt| match op { + Operand::Reg(reg) => { + dbg!(("reg use", reg, stmt)); + if uses[reg.as_usize()].is_none() { + dbg!("insert!"); + uses[reg.as_usize()] = Some(Location { bb, stmt }) + } + } + Operand::Const(_) => {} + }; + + if let Branch::Ret(op) = func.bb(bb).term { + check_op(op, None); + } + for (i, stmt) in func.bb(bb).statements.iter().enumerate() { + let check_op = |op| { + check_op(op, Some(i)); + }; + + struct StmtVisitor { + check_op: F, + } + impl Visitor for StmtVisitor { + fn visit_operand(&mut self, op: Operand) { + (self.check_op)(op); + } + } + StmtVisitor { check_op }.visit_statement(stmt); + } + } + + uses +} + +#[cfg(test)] +mod tests { + use crate::{ + define_ir_func, + ir::{BbIdx, Location}, + LoweringCx, + }; + + #[test] + fn postorder_graph() { + let arena = bumpalo::Bump::new(); + let lcx = LoweringCx::new(&arena); + + let f = define_ir_func! { + def(lcx) name (), regs(0) { + 0: { + => Goto(BbIdx(1)) + } + 1: { + => Switch { cond: op(0), yes: BbIdx(2), no: BbIdx(3) } + } + 2: { + => Ret(op(0)) + } + 3: { + => Goto(BbIdx(0)) + } + } + }; + + let traverse = super::traverse_postorder(&f); + + assert_eq!(traverse, vec![BbIdx(3), BbIdx(2), BbIdx(1), BbIdx(0)]); + } + + #[test] + fn single_bb() { + let arena = bumpalo::Bump::new(); + let lcx = LoweringCx::new(&arena); + + let f = define_ir_func! { + // %0 = add 0, 2 + // %1 = add %0, 1 + // %2 = add %0, 2 + // ret %0 + def(lcx) name (), regs(3) { + 0: { + BinOp { kind: BinKind::Add, lhs: op(0), rhs: op(2), result: Register(0) }; + BinOp { kind: BinKind::Add, lhs: op(Register(0)), rhs: op(1), result: Register(1) }; + BinOp { kind: BinKind::Add, lhs: op(Register(1)), rhs: op(2), result: Register(2) }; + => Ret(op(Register(0))) + } + } + }; + + let uses = super::last_register_uses(&f); + + assert_eq!( + uses, + vec![ + Some(Location { + bb: BbIdx(0), + stmt: None + }), + Some(Location { + bb: BbIdx(0), + stmt: Some(2) + }), + None, + ] + ); + } +} diff --git a/analysis/src/ir/pretty.rs b/analysis/src/ir/pretty.rs index 65371d1..22a416a 100644 --- a/analysis/src/ir/pretty.rs +++ b/analysis/src/ir/pretty.rs @@ -65,7 +65,7 @@ impl PrettyPrinter { for stmt in &bb.statements { match stmt.kind { - StatementKind::Alloca { reg, size, align } => { + StatementKind::Alloca { result: reg, size, align } => { writeln!( self.out, " {} = alloca, size={}, align={}", @@ -145,13 +145,13 @@ impl PrettyPrinter { ), StatementKind::PtrOffset { result, - reg, + ptr: reg, amount, } => writeln!( self.out, " {} = ptroffset {}, {}", print_reg(result), - print_reg(reg), + print_op(reg), print_op(amount) ), StatementKind::Call { diff --git a/analysis/src/ir/validate.rs b/analysis/src/ir/validate.rs index 64a3096..abb1aff 100644 --- a/analysis/src/ir/validate.rs +++ b/analysis/src/ir/validate.rs @@ -1,28 +1,45 @@ use rustc_hash::FxHashSet; -use super::{Branch, Ir}; +use super::{visit::Visitor, Branch, Func, Register}; use crate::ir::BbIdx; -pub fn validate(ir: &Ir<'_>) { - for fun in ir.funcs.values() { - for (i, bb) in fun.bbs.iter().enumerate() { - if let Branch::Goto(BbIdx(u32::MAX)) = bb.term { - panic!( - "found dummy term in {} in {}", - BbIdx::from_usize(i), - fun.name - ) - } +pub fn validate(func: &Func<'_>) { + for (i, bb) in func.bbs.iter().enumerate() { + if let Branch::Goto(BbIdx(u32::MAX)) = bb.term { + panic!( + "found dummy term in {} in {}", + BbIdx::from_usize(i), + func.name + ) } + } - let mut reg_names = FxHashSet::default(); - for reg in &fun.regs { - if let Some(name) = reg.name { - let is_new = reg_names.insert(name); - if !is_new { - panic!("register name {name} is used twice"); - } + let mut reg_names = FxHashSet::default(); + for reg in &func.regs { + if let Some(name) = reg.name { + let is_new = reg_names.insert(name); + if !is_new { + panic!("register name {name} is used twice"); } } } + + ValidationVisitor { func }.visit_func(func); +} + +struct ValidationVisitor<'a> { + func: &'a Func<'a>, +} + +impl Visitor for ValidationVisitor<'_> { + fn visit_reg(&mut self, reg: Register) { + if self.func.regs.len() <= reg.as_usize() { + panic!( + "register out of bounds in {}. %{}, {} registers", + self.func.name, + reg.0, + self.func.regs.len() + ); + } + } } diff --git a/analysis/src/ir/visit.rs b/analysis/src/ir/visit.rs new file mode 100644 index 0000000..c37fc60 --- /dev/null +++ b/analysis/src/ir/visit.rs @@ -0,0 +1,103 @@ +use super::{BasicBlock, ConstValue, Func, Operand, Register, Statement, StatementKind}; + +pub trait Visitor { + fn visit_func(&mut self, func: &Func<'_>) { + self.super_func(func); + } + fn visit_bb(&mut self, bb: &BasicBlock) { + self.super_bb(bb); + } + fn visit_statement(&mut self, stmt: &Statement) { + self.super_statement(stmt); + } + fn visit_operand(&mut self, op: Operand) { + self.super_operand(op); + } + fn visit_reg(&mut self, _: Register) {} + fn visit_const(&mut self, _: ConstValue) {} + + fn super_func(&mut self, func: &Func<'_>) { + for bb in &func.bbs { + self.visit_bb(bb); + } + } + + fn super_bb(&mut self, bb: &BasicBlock) { + for stmt in &bb.statements { + self.visit_statement(stmt); + } + } + + fn super_statement(&mut self, stmt: &Statement) { + match stmt.kind { + StatementKind::Alloca { + result, + size, + align, + } => { + self.visit_reg(result); + self.visit_operand(size); + self.visit_operand(align); + }, + StatementKind::Store { + ptr, + value, + size, + align, + } => { + self.visit_operand(ptr); + self.visit_operand(value); + self.visit_operand(size); + self.visit_operand(align); + }, + StatementKind::Load { + result, + ptr, + size, + align, + } => { + self.visit_reg(result); + self.visit_operand(ptr); + self.visit_operand(size); + self.visit_operand(align); + }, + StatementKind::BinOp { + kind: _, + lhs, + rhs, + result, + } => { + self.visit_reg(result); + self.visit_operand(lhs); + self.visit_operand(rhs); + }, + StatementKind::UnaryOperation { rhs, kind: _, result } => { + self.visit_reg(result); + self.visit_operand(rhs); + }, + StatementKind::PtrOffset { + result, + ptr, + amount, + } => { + self.visit_reg(result); + self.visit_operand(ptr); + self.visit_operand(amount); + }, + StatementKind::Call { result, func, ref args } => { + self.visit_reg(result); + self.visit_operand(func); + for &arg in args { + self.visit_operand(arg); + } + }, + } + } + + fn super_operand(&mut self, op: Operand) { + match op { + Operand::Reg(reg) => self.visit_reg(reg), + Operand::Const(c) => self.visit_const(c), + } + } +} diff --git a/analysis/src/lower.rs b/analysis/src/lower.rs index 703de1f..5e77ed0 100644 --- a/analysis/src/lower.rs +++ b/analysis/src/lower.rs @@ -2,7 +2,7 @@ mod builder; mod typeck; use parser::{ - ast::{self, ExprBinary, IntSign, IntTy, IntTyKind}, + ast::{self, ExprBinary}, Span, Symbol, }; use rustc_hash::FxHashMap; @@ -68,7 +68,9 @@ pub fn lower_translation_unit<'cx>( } } - ir::validate(&ir); + for func in ir.funcs.values() { + ir::validate(func); + } Ok(ir) } @@ -77,21 +79,6 @@ struct FnLoweringCtxt<'a, 'cx> { scopes: Vec>>, build: FuncBuilder<'a, 'cx>, lcx: &'a LoweringCx<'cx>, - types: CommonTypes<'cx>, -} - -struct CommonInt<'cx> { - signed: Ty<'cx>, - unsigned: Ty<'cx>, -} - -struct CommonTypes<'cx> { - void: Ty<'cx>, - char: Ty<'cx>, - su_char: CommonInt<'cx>, - short: CommonInt<'cx>, - int: CommonInt<'cx>, - long: CommonInt<'cx>, } impl<'a, 'cx> FnLoweringCtxt<'a, 'cx> { @@ -250,11 +237,11 @@ impl<'a, 'cx> FnLoweringCtxt<'a, 'cx> { let op_tyl = match expr { ast::Expr::Atom(ast::Atom::Char(c)) => ( Operand::Const(ConstValue::Int((*c).into())), - self.lcx.layout_of(self.types.char), + self.lcx.layout_of(self.lcx.types.char), ), ast::Expr::Atom(ast::Atom::Int(i)) => ( Operand::Const(ConstValue::Int(*i as _)), - self.lcx.layout_of(self.types.int.signed), + self.lcx.layout_of(self.lcx.types.int.signed), ), ast::Expr::Atom(ast::Atom::Float(_)) => todo!("no floats"), ast::Expr::Atom(ast::Atom::Ident((ident, ident_span))) => { @@ -284,7 +271,7 @@ impl<'a, 'cx> FnLoweringCtxt<'a, 'cx> { let lit_def_id = self.lcx.intern_str_lit(string); ( Operand::Const(ConstValue::StaticPtr(lit_def_id)), - self.ty_layout(TyKind::Ptr(self.types.char)), + self.ty_layout(TyKind::Ptr(self.lcx.types.char)), ) } ast::Expr::Unary(ast::ExprUnary { @@ -467,7 +454,6 @@ fn lower_func<'cx>( params.len().try_into().unwrap(), ), lcx, - types: CommonTypes::new(lcx), }; for param in params { @@ -523,22 +509,3 @@ fn lower_func<'cx>( Ok(cx.build.finish()) } - -impl<'cx> CommonTypes<'cx> { - fn new(lcx: &LoweringCx<'cx>) -> Self { - let int = |sign, kind| lcx.intern_ty(TyKind::Int(IntTy(sign, kind))); - let int_pair = |kind| CommonInt { - signed: int(IntSign::Signed, kind), - unsigned: int(IntSign::Unsigned, kind), - }; - - Self { - void: lcx.intern_ty(TyKind::Void), - char: lcx.intern_ty(TyKind::Char), - su_char: int_pair(IntTyKind::Char), - short: int_pair(IntTyKind::Short), - int: int_pair(IntTyKind::Int), - long: int_pair(IntTyKind::Long), - } - } -} diff --git a/analysis/src/lower/builder.rs b/analysis/src/lower/builder.rs index 937b49c..c24e3ba 100644 --- a/analysis/src/lower/builder.rs +++ b/analysis/src/lower/builder.rs @@ -55,7 +55,7 @@ impl<'a, 'cx> FuncBuilder<'a, 'cx> { let stmt = Statement { span, kind: StatementKind::Alloca { - reg, + result: reg, size: Operand::Const(ConstValue::u64(layout.size)), align: Operand::Const(ConstValue::u64(layout.align)), }, diff --git a/analysis/src/lower/typeck.rs b/analysis/src/lower/typeck.rs index feae617..66d8974 100644 --- a/analysis/src/lower/typeck.rs +++ b/analysis/src/lower/typeck.rs @@ -148,11 +148,11 @@ impl<'a, 'cx> FnLoweringCtxt<'a, 'cx> { // §6.3.1.1 Boolean, characters, and integers fn promote(&self, ty: Ty<'cx>, span: Span) -> Result> { Ok(match *ty { - TyKind::Char => smallvec![(Coercion::SignExt, self.types.int.signed)], + TyKind::Char => smallvec![(Coercion::SignExt, self.lcx.types.int.signed)], TyKind::Int(int) if int.1 < IntTyKind::Int => match int.0 { - IntSign::Signed => smallvec![(Coercion::SignExt, self.types.int.signed)], + IntSign::Signed => smallvec![(Coercion::SignExt, self.lcx.types.int.signed)], IntSign::Unsigned => { - smallvec![(Coercion::ZeroExt, self.types.int.unsigned)] + smallvec![(Coercion::ZeroExt, self.lcx.types.int.unsigned)] } }, TyKind::Int(_) => smallvec![], diff --git a/codegen/src/lib.rs b/codegen/src/lib.rs index 0525aa4..ad6900c 100644 --- a/codegen/src/lib.rs +++ b/codegen/src/lib.rs @@ -1,4 +1,6 @@ -mod x86; +#![allow(unused)] + +mod x86_64; use std::process::Stdio; @@ -20,7 +22,7 @@ pub fn generate<'cx>(lcx: &'cx LoweringCx<'cx>, ir: &Ir<'cx>) -> Result<()> { let text = obj.add_section(Vec::new(), b".text".to_vec(), object::SectionKind::Text); for (_def_id, func) in &ir.funcs { - let code = x86::generate_func(lcx, func)?; + let code = x86_64::generate_func(lcx, func)?; let offset = obj.append_section_data(text, &code, 8); diff --git a/codegen/src/x86.rs b/codegen/src/x86_64.rs similarity index 58% rename from codegen/src/x86.rs rename to codegen/src/x86_64.rs index fea78b2..95eb509 100644 --- a/codegen/src/x86.rs +++ b/codegen/src/x86_64.rs @@ -1,10 +1,77 @@ +//! Basic codegen for the x86-64 architecture. +//! +//! We use the [`iced_x86`] crate as our assembler. +//! +//! Then, all IR basic blocks and statements are lowered in a straightforward way. +//! No optimizations are done. There is some basic register allocation. +//! +//! # Register allocation +//! +//! Register allocation is not very smart, but also not too stupid. It tries to put SSA +//! registers into machine registers as much as possible. +//! +//! ```text +//! bb0: +//! %0 = 0 +//! %1 = 1 +//! %2 = add %0 %1 +//! switch %2, then bb1, else bb2 +//! +//! bb1: +//! %3 = add %1, 1 +//! +//! bb2: +//! %4 = add %2, 2 +//! ``` +//! +//! For all SSA registers, we establish their "point of last use". This is the bb,stmt where their last usage occurs. +//! +//! First, we establish a list of possible registers to allocate. +//! Since we immediately alloca all parameters, all the param registers are free real estate. +//! Also, `rbx` is always saved on the stack at the start and end. +//! +//! ```text +//! rax, rbx, rdi, rsi, rcx, rdx, r8, r9 +//! ``` +//! +//! This forms our priority list of registers. +//! +//! Every time a statement has a return value, we try to assign that SSA register into a new machine register. +//! For this, we iterate through the register list above and find the first register that's free. If we see a register +//! that is not used anymore at the current location, we throw it out and use that new slot. +//! +//! When codegening an SSA register, we look into a lookup table from SSA register to machine register/stack spill and use that. +//! +//! When the list above is full, we spill the register to the stack. This should be rare. If the register doesn't fit into a machine +//! register, it's also spilled. +//! +//! ## Registers +//! +//! +//! | name | description | callee-saved | +//! | -------- | -------------------- | ------------ | +//! | %rax | temporary register; with variable arguments passes information about the number of vector registers used; 1st return register | No | +//! | %rbx | callee-saved register | Yes | +//! | %rcx | used to pass 4th integer argument to functions | No | +//! | %rdx | used to pass 3rd argument to functions; 2nd return register | No | +//! | %rsp | stack pointer | Yes | +//! | %rbp | callee-saved register; optionally used as frame pointer | Yes | +//! | %rsi | used to pass 2nd argument to functions | No | +//! | %rdi | used to pass 1st argument to functions | No | +//! | %r8 | used to pass 5th argument to functions | No | +//! | %r9 | used to pass 6th argument to functions | No | +//! | %r10 | temporary register, used for passing a function’s static chain pointer | No | +//! | %r11 | temporary register | No | +//! | %r12-r14 | callee-saved registers | Yes | +//! | %r15 | callee-saved register; optionally used as GOT base pointer | Yes | + use analysis::{ - ir::{BbIdx, ConstValue, Func, Operand, Register, Statement, StatementKind}, + ir::{BbIdx, Func, Operand, Register, Statement, StatementKind}, LoweringCx, }; use iced_x86::{ code_asm::{self as x, CodeAssembler}, - IcedError, Instruction, + IcedError, }; use parser::Span; use rustc_hash::FxHashMap; @@ -51,7 +118,7 @@ impl<'cx> AsmCtxt<'cx> { match *kind { StatementKind::Alloca { - reg, + result: reg, size, align: _, } => { @@ -110,7 +177,7 @@ impl<'cx> AsmCtxt<'cx> { StatementKind::UnaryOperation { rhs, kind, result } => todo!(), StatementKind::PtrOffset { result, - reg, + ptr: reg, amount, } => todo!(), StatementKind::Call { diff --git a/notes.md b/notes.md index ddfdea1..7298cfc 100644 --- a/notes.md +++ b/notes.md @@ -53,17 +53,3 @@ def main() { ret 0 } ``` - -```x86asm -sub rbp, 4 ; a -sub rbp, 4 ; %1 -mov rbx, 3 -mul rbx, 4 -mov dword ptr [rbp], rbx -sub rbp, 4 ; %2 -mov rbx, 1 -add rbx, dword ptr [rbp + 4] -mov dword ptr [rbp + 8], dword ptr [rbp] -xor rax, rax -ret -``` \ No newline at end of file diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 64ad9d2..5b098dc 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -32,6 +32,10 @@ impl Span { Self { start, end } } + pub fn dummy() -> Self { + Self::start_end(0, 0) + } + pub fn extend(&self, rhs: Self) -> Self { Self::start_end(self.start, rhs.end) } diff --git a/parser/src/parser/expr.rs b/parser/src/parser/expr.rs index 93010e0..7005f94 100644 --- a/parser/src/parser/expr.rs +++ b/parser/src/parser/expr.rs @@ -1,6 +1,6 @@ //! The expression parser is implemented as a pratt parser. //! -//! For more information, see https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html +//! For more information, see use crate::{ ast::{