diff --git a/README.md b/README.md index 12a9dee..9d025cb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1 @@ # uwuc - -# Building - -`uwucc` requires a nightly rust compiler because of the following features: -* `let_else` \ No newline at end of file diff --git a/analysis/src/ir.rs b/analysis/src/ir.rs index 8ac98d7..164a449 100644 --- a/analysis/src/ir.rs +++ b/analysis/src/ir.rs @@ -33,7 +33,7 @@ mod custom; pub mod info; -mod pretty; +pub mod pretty; mod validate; mod visit; @@ -299,3 +299,21 @@ impl Branch { } } } + +impl Location { + pub fn start() -> Self { + Self { + bb: BbIdx(0), + stmt: Some(0), + } + } + pub fn terminator(bb: BbIdx) -> Self { + Self { bb, stmt: None } + } + pub fn stmt(bb: BbIdx, stmt: usize) -> Self { + Self { + bb, + stmt: Some(stmt), + } + } +} diff --git a/analysis/src/ir/pretty.rs b/analysis/src/ir/pretty.rs index 4893b1b..1ea52c8 100644 --- a/analysis/src/ir/pretty.rs +++ b/analysis/src/ir/pretty.rs @@ -1,39 +1,63 @@ -use std::fmt::{Display, Formatter, Result, Write}; +use std::{ + cell::Cell, + fmt::{self, Display, Formatter, Result, Write}, +}; -use super::{BbIdx, BinKind, Branch, ConstValue, Func, Ir, Operand, StatementKind, UnaryKind}; +use super::{ + BbIdx, BinKind, Branch, ConstValue, Func, Ir, Location, Operand, StatementKind, UnaryKind, +}; use crate::ir::Register; -pub fn ir_to_string(ir: &Ir<'_>) -> String { +pub fn ir_to_string<'a>(ir: &'a Ir<'a>, custom: &impl Customizer<'a>) -> String { let mut buf = String::new(); - PrettyPrinter { out: &mut buf }.ir(ir).unwrap(); + PrettyPrinter { out: &mut buf }.ir(ir, custom).unwrap(); buf } -pub fn func_to_string(func: &Func<'_>) -> String { +pub fn func_to_string<'a>(func: &'a Func<'a>, custom: &impl Customizer<'a>) -> String { let mut buf = String::new(); - PrettyPrinter { out: &mut buf }.func(func).unwrap(); + PrettyPrinter { out: &mut buf }.func(func, custom).unwrap(); buf } +pub trait Customizer<'a> { + fn start_func(&self, func: &'a Func<'a>); + fn fmt_reg(&self, reg: Register, f: &mut fmt::Formatter<'_>, loc: Location) -> fmt::Result; +} + +#[derive(Default)] +pub struct DefaultCustomizer<'a>(Cell>>); + +impl<'a> Customizer<'a> for DefaultCustomizer<'a> { + fn start_func(&self, func: &'a Func<'a>) { + self.0.set(Some(func)); + } + + fn fmt_reg(&self, reg: Register, f: &mut fmt::Formatter<'_>, loc: Location) -> fmt::Result { + match self.0.get().unwrap().regs[reg.0 as usize].name { + None => write!(f, "%{}", reg.0), + Some(name) => write!(f, "%{name}"), + } + } +} + pub struct PrettyPrinter { out: W, } impl PrettyPrinter { - pub fn ir(&mut self, ir: &Ir<'_>) -> Result { + pub fn ir<'a>(&mut self, ir: &'a Ir<'a>, custom: &impl Customizer<'a>) -> Result { for func in ir.funcs.values() { - self.func(func)?; + self.func(func, custom)?; } Ok(()) } - pub fn func(&mut self, func: &Func<'_>) -> Result { - let print_reg = |reg: Register| { - display_fn(move |f| match func.regs[reg.0 as usize].name { - None => write!(f, "%{}", reg.0), - Some(name) => write!(f, "%{name}"), - }) - }; + pub fn func<'a>(&mut self, func: &'a Func<'a>, custom: &impl Customizer<'a>) -> Result { + custom.start_func(func); + + let print_reg = + |reg: Register, loc: Location| display_fn(move |f| custom.fmt_reg(reg, f, loc)); write!(self.out, "def {}(", func.name)?; for param in 0..func.arity { @@ -42,7 +66,7 @@ impl PrettyPrinter { self.out, "{} {}", reg.tyl.ty, - print_reg(Register(param as _)) + print_reg(Register(param as _), Location::start()) )?; if (param + 1) != func.arity { write!(self.out, ", ")?; @@ -50,20 +74,25 @@ impl PrettyPrinter { } writeln!(self.out, ") {{",)?; - let print_op = |op: Operand| { + let print_op = |op: Operand, loc: Location| { display_fn(move |f| match op { Operand::Const(c) => Display::fmt(&c, f), - Operand::Reg(reg) => Display::fmt(&print_reg(reg), f), + Operand::Reg(reg) => Display::fmt(&print_reg(reg, loc), f), }) }; for (i, bb) in func.bbs.iter().enumerate() { + let bb_idx = BbIdx::from_usize(i); if i > 0 { writeln!(self.out)?; } - writeln!(self.out, " {}:", BbIdx::from_usize(i))?; + writeln!(self.out, " {}:", bb_idx)?; + + for (stmt_idx, stmt) in bb.statements.iter().enumerate() { + let loc = Location::stmt(bb_idx, stmt_idx); + let print_reg = |reg| print_reg(reg, loc); + let print_op = |op| print_op(op, loc); - for stmt in &bb.statements { match stmt.kind { StatementKind::Alloca { result: reg, @@ -177,14 +206,15 @@ impl PrettyPrinter { }?; } + let loc = Location::terminator(bb_idx); match bb.term { Branch::Goto(bbn) => writeln!(self.out, " goto {}", bbn)?, Branch::Switch { cond, yes, no } => writeln!( self.out, " switch {}, then {yes}, else {no}", - print_op(cond) + print_op(cond, loc) )?, - Branch::Ret(op) => writeln!(self.out, " ret {}", print_op(op))?, + Branch::Ret(op) => writeln!(self.out, " ret {}", print_op(op, loc))?, } } diff --git a/analysis/src/lower.rs b/analysis/src/lower.rs index fd136b5..faaf4a7 100644 --- a/analysis/src/lower.rs +++ b/analysis/src/lower.rs @@ -37,7 +37,9 @@ pub fn lower_translation_unit<'cx>( let (ref declarator, def_span) = decl.init_declarators[0]; - let ast::DirectDeclarator::WithParams { ident, params } = &declarator.declarator.decl else { + let ast::DirectDeclarator::WithParams { ident, params } = + &declarator.declarator.decl + else { unreachable!("function def needs withparams declarator"); }; @@ -144,7 +146,10 @@ impl<'a, 'cx> FnLoweringCtxt<'a, 'cx> { todo!("complex lvalues") }; let Some(var) = self.resolve_ident(ident) else { - return Err(Error::new(format!("cannot find variable {ident}"), ident_span)); + return Err(Error::new( + format!("cannot find variable {ident}"), + ident_span, + )); }; Ok(match var.kind { VariableInfoKind::Local { ptr_to } => (Operand::Reg(ptr_to), var.tyl), @@ -245,7 +250,10 @@ impl<'a, 'cx> FnLoweringCtxt<'a, 'cx> { ast::Expr::Atom(ast::Atom::Float(_)) => todo!("no floats"), ast::Expr::Atom(ast::Atom::Ident((ident, ident_span))) => { let Some(var) = self.resolve_ident(*ident) else { - return Err(Error::new(format!("cannot find variable {ident}"), *ident_span)); + return Err(Error::new( + format!("cannot find variable {ident}"), + *ident_span, + )); }; let tyl = var.tyl; match var.kind { @@ -445,13 +453,7 @@ fn lower_func<'cx>( ) -> Result, Error> { let mut cx = FnLoweringCtxt { scopes: vec![Default::default()], - build: FuncBuilder::new( - name, - def_span, - ret_ty, - lcx, - params.len(), - ), + build: FuncBuilder::new(name, def_span, ret_ty, lcx, params.len()), lcx, }; diff --git a/analysis/src/lower/builder.rs b/analysis/src/lower/builder.rs index 455a390..b5f8ae4 100644 --- a/analysis/src/lower/builder.rs +++ b/analysis/src/lower/builder.rs @@ -166,7 +166,10 @@ impl<'a, 'cx> FuncBuilder<'a, 'cx> { } pub fn finish(self) -> Func<'cx> { - println!("{}", ir::func_to_string(&self.ir)); + println!( + "{}", + ir::func_to_string(&self.ir, &ir::pretty::DefaultCustomizer::default()) + ); self.ir } diff --git a/codegen/src/lib.rs b/codegen/src/lib.rs index 3163b19..d92d668 100644 --- a/codegen/src/lib.rs +++ b/codegen/src/lib.rs @@ -1,3 +1,4 @@ +mod registers; mod x86_64; use std::process::Stdio; @@ -18,6 +19,13 @@ pub fn generate<'cx>(lcx: &'cx LoweringCx<'cx>, ir: &Ir<'cx>) -> Result<()> { object::Endianness::Little, ); + // GNU linkers have this awesome thing where they'll mark your stack as executable unless you tell them not to. + obj.add_section( + Vec::new(), + b".note.GNU-stack".to_vec(), + object::SectionKind::Note, + ); + let text = obj.add_section(Vec::new(), b".text".to_vec(), object::SectionKind::Text); for func in ir.funcs.values() { diff --git a/codegen/src/registers.rs b/codegen/src/registers.rs new file mode 100644 index 0000000..dc2d6c4 --- /dev/null +++ b/codegen/src/registers.rs @@ -0,0 +1,122 @@ +//! # Register allocation +//! +//! Register allocation is not very smart, but also not too stupid. It tries to put SSA +//! registers into machine registers as much as possible. +//! +//! ```text +//! bb0: +//! %0 = 0 +//! %1 = 1 +//! %2 = add %0 %1 +//! switch %2, then bb1, else bb2 +//! +//! bb1: +//! %3 = add %1, 1 +//! +//! bb2: +//! %4 = add %2, 2 +//! ``` +//! +//! For all SSA registers, we establish their "point of last use". This is the bb,stmt where their last usage occurs. +//! +//! First, we establish a list of possible registers to allocate. +//! Since we immediately alloca all parameters, all the param registers are free real estate. +//! +//! ```text +//! rax, rdi, rsi, rcx, rdx, r8, r9 +//! ``` +//! +//! This forms our priority list of registers. +//! +//! Every time a statement has a return value, we try to assign that SSA register into a new machine register. +//! For this, we iterate through the register list above and find the first register that's free. If we see a register +//! that is not used anymore at the current location, we throw it out and use that new slot. +//! +//! When codegening an SSA register, we look into a lookup table from SSA register to machine register/stack spill and use that. +//! +//! When the list above is full, we spill the register to the stack. This should be rare. If the register doesn't fit into a machine +//! register, it's also spilled. +//! +//! We do a first pass over the function to calculate all the offsets and registers +//! we want to use. + +use std::cell::Cell; + +use analysis::ir::{self, Func, Location, Register}; +use rustc_hash::FxHashMap; + +/// A machine register from our register list described in the module documentation. +#[derive(Debug, Clone, Copy)] +pub struct MachineReg(pub usize); + +#[derive(Debug, Clone, Copy)] +pub enum RegValue { + /// The SSA register contains an address on the stack. + /// The offset is the offset from the start of the function. + StackRelative { offset: u64 }, + /// The SSA register resides on the stack as it has been spilled. + /// This should be rather rare in practice. + Spilled { offset: u64 }, + /// The SSA register resides in a machine register. + MachineReg(MachineReg), +} + +#[derive(Debug)] +pub struct FunctionLayout { + /// Where a register comes from at a particular usage of a register. + register_uses: FxHashMap<(Location, Register), RegValue>, + total_stack_space: u64, +} + +pub fn compute_layout(f: &Func) -> FunctionLayout { + let register_uses = FxHashMap::default(); + + FunctionLayout { + register_uses, + total_stack_space: 0, + } +} + +pub struct LayoutPrinter<'a>(Cell>>, &'a FunctionLayout); + +impl<'a> ir::pretty::Customizer<'a> for LayoutPrinter<'a> { + fn start_func(&self, func: &'a Func<'a>) { + self.0.set(Some(func)); + } + + fn fmt_reg( + &self, + reg: Register, + f: &mut std::fmt::Formatter<'_>, + loc: Location, + ) -> std::fmt::Result { + let layout = self.1.register_uses.get(&(loc, reg)); + write!(f, "{{")?; + + match self.0.get().unwrap().regs[reg.0 as usize].name { + None => write!(f, "%{}", reg.0)?, + Some(name) => write!(f, "%{name}")?, + } + + write!(f, ", ")?; + match layout { + Some(RegValue::MachineReg(mach)) => write!(f, "reg-{}", mach.0)?, + Some(RegValue::Spilled { offset }) => write!(f, "spill-{offset}")?, + Some(RegValue::StackRelative { offset }) => { + write!(f, "i-forgot-what-this-meant-{offset}")? + } + None => write!(f, "")?, + } + + write!(f, "}}") + } +} + +pub fn debug_layout(func: &Func, layout: &FunctionLayout) { + let custom = LayoutPrinter(Cell::default(), layout); + + println!("----- code layout"); + println!("{}", ir::pretty::func_to_string(func, &custom)); + + dbg!(layout); +} diff --git a/codegen/src/x86_64.rs b/codegen/src/x86_64.rs index 51d6586..6899be9 100644 --- a/codegen/src/x86_64.rs +++ b/codegen/src/x86_64.rs @@ -5,46 +5,6 @@ //! Then, all IR basic blocks and statements are lowered in a straightforward way. //! No optimizations are done. There is some basic register allocation. //! -//! # Register allocation -//! -//! Register allocation is not very smart, but also not too stupid. It tries to put SSA -//! registers into machine registers as much as possible. -//! -//! ```text -//! bb0: -//! %0 = 0 -//! %1 = 1 -//! %2 = add %0 %1 -//! switch %2, then bb1, else bb2 -//! -//! bb1: -//! %3 = add %1, 1 -//! -//! bb2: -//! %4 = add %2, 2 -//! ``` -//! -//! For all SSA registers, we establish their "point of last use". This is the bb,stmt where their last usage occurs. -//! -//! First, we establish a list of possible registers to allocate. -//! Since we immediately alloca all parameters, all the param registers are free real estate. -//! Also, `rbx` is always saved on the stack at the start and end. -//! -//! ```text -//! rax, rbx, rdi, rsi, rcx, rdx, r8, r9 -//! ``` -//! -//! This forms our priority list of registers. -//! -//! Every time a statement has a return value, we try to assign that SSA register into a new machine register. -//! For this, we iterate through the register list above and find the first register that's free. If we see a register -//! that is not used anymore at the current location, we throw it out and use that new slot. -//! -//! When codegening an SSA register, we look into a lookup table from SSA register to machine register/stack spill and use that. -//! -//! When the list above is full, we spill the register to the stack. This should be rare. If the register doesn't fit into a machine -//! register, it's also spilled. -//! //! ## Registers //! //! @@ -78,7 +38,10 @@ use iced_x86::{ use parser::{Error, Span}; use rustc_hash::FxHashMap; -use crate::Result; +use crate::{ + registers::{MachineReg, RegValue}, + Result, +}; trait IcedErrExt { type T; @@ -93,22 +56,6 @@ impl IcedErrExt for Result { } } -/// A machine register from our register list described in the module documentation. -#[derive(Debug, Clone, Copy)] -struct MachineReg(usize); - -#[derive(Debug, Clone, Copy)] -enum RegValue { - /// The SSA register contains an address on the stack. - /// The offest is the offset from the start of the function. - StackRelative { offset: u64 }, - /// The SSA register resides on the stack as it has been spilled. - /// This should be rather rare in practice. - Spilled { offset: u64 }, - /// The SSA register resides in a machine register. - MachineReg(MachineReg), -} - struct AsmCtxt<'cx> { lcx: &'cx LoweringCx<'cx>, a: CodeAssembler, @@ -276,6 +223,9 @@ impl<'cx> AsmCtxt<'cx> { pub fn generate_func<'cx>(lcx: &'cx LoweringCx<'cx>, func: &Func<'cx>) -> Result> { assert_eq!(func.arity, 0, "arguments??? in MY uwucc????"); + let layout = crate::registers::compute_layout(func); + crate::registers::debug_layout(func, &layout); + let fn_sp = func.def_span; let a = CodeAssembler::new(64).sp(fn_sp)?; diff --git a/parser/src/parser/expr.rs b/parser/src/parser/expr.rs index 6192cb5..af6972f 100644 --- a/parser/src/parser/expr.rs +++ b/parser/src/parser/expr.rs @@ -42,7 +42,9 @@ where let r_bp = prefix_binding_power(&Tok::Punct(punct)).ok_or_else(|| { Error::new(format!("expected expression, found {punct}"), span) })?; - let Some(op) = unary_op_from_token(&Tok::Punct(punct)) else { panic!() }; + let Some(op) = unary_op_from_token(&Tok::Punct(punct)) else { + panic!() + }; let rhs = self.expr_bp(r_bp)?; self.next_t()?;