do some registers

This commit is contained in:
nora 2023-07-08 21:07:42 +02:00
parent a363b7c6d1
commit e28469fcc0
9 changed files with 227 additions and 97 deletions

View file

@ -1,6 +1 @@
# uwuc
# Building
`uwucc` requires a nightly rust compiler because of the following features:
* `let_else`

View file

@ -33,7 +33,7 @@
mod custom;
pub mod info;
mod pretty;
pub mod pretty;
mod validate;
mod visit;
@ -299,3 +299,21 @@ impl Branch {
}
}
}
impl Location {
pub fn start() -> Self {
Self {
bb: BbIdx(0),
stmt: Some(0),
}
}
pub fn terminator(bb: BbIdx) -> Self {
Self { bb, stmt: None }
}
pub fn stmt(bb: BbIdx, stmt: usize) -> Self {
Self {
bb,
stmt: Some(stmt),
}
}
}

View file

@ -1,39 +1,63 @@
use std::fmt::{Display, Formatter, Result, Write};
use std::{
cell::Cell,
fmt::{self, Display, Formatter, Result, Write},
};
use super::{BbIdx, BinKind, Branch, ConstValue, Func, Ir, Operand, StatementKind, UnaryKind};
use super::{
BbIdx, BinKind, Branch, ConstValue, Func, Ir, Location, Operand, StatementKind, UnaryKind,
};
use crate::ir::Register;
pub fn ir_to_string(ir: &Ir<'_>) -> String {
pub fn ir_to_string<'a>(ir: &'a Ir<'a>, custom: &impl Customizer<'a>) -> String {
let mut buf = String::new();
PrettyPrinter { out: &mut buf }.ir(ir).unwrap();
PrettyPrinter { out: &mut buf }.ir(ir, custom).unwrap();
buf
}
pub fn func_to_string(func: &Func<'_>) -> String {
pub fn func_to_string<'a>(func: &'a Func<'a>, custom: &impl Customizer<'a>) -> String {
let mut buf = String::new();
PrettyPrinter { out: &mut buf }.func(func).unwrap();
PrettyPrinter { out: &mut buf }.func(func, custom).unwrap();
buf
}
pub trait Customizer<'a> {
fn start_func(&self, func: &'a Func<'a>);
fn fmt_reg(&self, reg: Register, f: &mut fmt::Formatter<'_>, loc: Location) -> fmt::Result;
}
#[derive(Default)]
pub struct DefaultCustomizer<'a>(Cell<Option<&'a Func<'a>>>);
impl<'a> Customizer<'a> for DefaultCustomizer<'a> {
fn start_func(&self, func: &'a Func<'a>) {
self.0.set(Some(func));
}
fn fmt_reg(&self, reg: Register, f: &mut fmt::Formatter<'_>, loc: Location) -> fmt::Result {
match self.0.get().unwrap().regs[reg.0 as usize].name {
None => write!(f, "%{}", reg.0),
Some(name) => write!(f, "%{name}"),
}
}
}
pub struct PrettyPrinter<W> {
out: W,
}
impl<W: Write> PrettyPrinter<W> {
pub fn ir(&mut self, ir: &Ir<'_>) -> Result {
pub fn ir<'a>(&mut self, ir: &'a Ir<'a>, custom: &impl Customizer<'a>) -> Result {
for func in ir.funcs.values() {
self.func(func)?;
self.func(func, custom)?;
}
Ok(())
}
pub fn func(&mut self, func: &Func<'_>) -> Result {
let print_reg = |reg: Register| {
display_fn(move |f| match func.regs[reg.0 as usize].name {
None => write!(f, "%{}", reg.0),
Some(name) => write!(f, "%{name}"),
})
};
pub fn func<'a>(&mut self, func: &'a Func<'a>, custom: &impl Customizer<'a>) -> Result {
custom.start_func(func);
let print_reg =
|reg: Register, loc: Location| display_fn(move |f| custom.fmt_reg(reg, f, loc));
write!(self.out, "def {}(", func.name)?;
for param in 0..func.arity {
@ -42,7 +66,7 @@ impl<W: Write> PrettyPrinter<W> {
self.out,
"{} {}",
reg.tyl.ty,
print_reg(Register(param as _))
print_reg(Register(param as _), Location::start())
)?;
if (param + 1) != func.arity {
write!(self.out, ", ")?;
@ -50,20 +74,25 @@ impl<W: Write> PrettyPrinter<W> {
}
writeln!(self.out, ") {{",)?;
let print_op = |op: Operand| {
let print_op = |op: Operand, loc: Location| {
display_fn(move |f| match op {
Operand::Const(c) => Display::fmt(&c, f),
Operand::Reg(reg) => Display::fmt(&print_reg(reg), f),
Operand::Reg(reg) => Display::fmt(&print_reg(reg, loc), f),
})
};
for (i, bb) in func.bbs.iter().enumerate() {
let bb_idx = BbIdx::from_usize(i);
if i > 0 {
writeln!(self.out)?;
}
writeln!(self.out, " {}:", BbIdx::from_usize(i))?;
writeln!(self.out, " {}:", bb_idx)?;
for (stmt_idx, stmt) in bb.statements.iter().enumerate() {
let loc = Location::stmt(bb_idx, stmt_idx);
let print_reg = |reg| print_reg(reg, loc);
let print_op = |op| print_op(op, loc);
for stmt in &bb.statements {
match stmt.kind {
StatementKind::Alloca {
result: reg,
@ -177,14 +206,15 @@ impl<W: Write> PrettyPrinter<W> {
}?;
}
let loc = Location::terminator(bb_idx);
match bb.term {
Branch::Goto(bbn) => writeln!(self.out, " goto {}", bbn)?,
Branch::Switch { cond, yes, no } => writeln!(
self.out,
" switch {}, then {yes}, else {no}",
print_op(cond)
print_op(cond, loc)
)?,
Branch::Ret(op) => writeln!(self.out, " ret {}", print_op(op))?,
Branch::Ret(op) => writeln!(self.out, " ret {}", print_op(op, loc))?,
}
}

View file

@ -37,7 +37,9 @@ pub fn lower_translation_unit<'cx>(
let (ref declarator, def_span) = decl.init_declarators[0];
let ast::DirectDeclarator::WithParams { ident, params } = &declarator.declarator.decl else {
let ast::DirectDeclarator::WithParams { ident, params } =
&declarator.declarator.decl
else {
unreachable!("function def needs withparams declarator");
};
@ -144,7 +146,10 @@ impl<'a, 'cx> FnLoweringCtxt<'a, 'cx> {
todo!("complex lvalues")
};
let Some(var) = self.resolve_ident(ident) else {
return Err(Error::new(format!("cannot find variable {ident}"), ident_span));
return Err(Error::new(
format!("cannot find variable {ident}"),
ident_span,
));
};
Ok(match var.kind {
VariableInfoKind::Local { ptr_to } => (Operand::Reg(ptr_to), var.tyl),
@ -245,7 +250,10 @@ impl<'a, 'cx> FnLoweringCtxt<'a, 'cx> {
ast::Expr::Atom(ast::Atom::Float(_)) => todo!("no floats"),
ast::Expr::Atom(ast::Atom::Ident((ident, ident_span))) => {
let Some(var) = self.resolve_ident(*ident) else {
return Err(Error::new(format!("cannot find variable {ident}"), *ident_span));
return Err(Error::new(
format!("cannot find variable {ident}"),
*ident_span,
));
};
let tyl = var.tyl;
match var.kind {
@ -445,13 +453,7 @@ fn lower_func<'cx>(
) -> Result<Func<'cx>, Error> {
let mut cx = FnLoweringCtxt {
scopes: vec![Default::default()],
build: FuncBuilder::new(
name,
def_span,
ret_ty,
lcx,
params.len(),
),
build: FuncBuilder::new(name, def_span, ret_ty, lcx, params.len()),
lcx,
};

View file

@ -166,7 +166,10 @@ impl<'a, 'cx> FuncBuilder<'a, 'cx> {
}
pub fn finish(self) -> Func<'cx> {
println!("{}", ir::func_to_string(&self.ir));
println!(
"{}",
ir::func_to_string(&self.ir, &ir::pretty::DefaultCustomizer::default())
);
self.ir
}

View file

@ -1,3 +1,4 @@
mod registers;
mod x86_64;
use std::process::Stdio;
@ -18,6 +19,13 @@ pub fn generate<'cx>(lcx: &'cx LoweringCx<'cx>, ir: &Ir<'cx>) -> Result<()> {
object::Endianness::Little,
);
// GNU linkers have this awesome thing where they'll mark your stack as executable unless you tell them not to.
obj.add_section(
Vec::new(),
b".note.GNU-stack".to_vec(),
object::SectionKind::Note,
);
let text = obj.add_section(Vec::new(), b".text".to_vec(), object::SectionKind::Text);
for func in ir.funcs.values() {

122
codegen/src/registers.rs Normal file
View file

@ -0,0 +1,122 @@
//! # Register allocation
//!
//! Register allocation is not very smart, but also not too stupid. It tries to put SSA
//! registers into machine registers as much as possible.
//!
//! ```text
//! bb0:
//! %0 = 0
//! %1 = 1
//! %2 = add %0 %1
//! switch %2, then bb1, else bb2
//!
//! bb1:
//! %3 = add %1, 1
//!
//! bb2:
//! %4 = add %2, 2
//! ```
//!
//! For all SSA registers, we establish their "point of last use". This is the bb,stmt where their last usage occurs.
//!
//! First, we establish a list of possible registers to allocate.
//! Since we immediately alloca all parameters, all the param registers are free real estate.
//!
//! ```text
//! rax, rdi, rsi, rcx, rdx, r8, r9
//! ```
//!
//! This forms our priority list of registers.
//!
//! Every time a statement has a return value, we try to assign that SSA register into a new machine register.
//! For this, we iterate through the register list above and find the first register that's free. If we see a register
//! that is not used anymore at the current location, we throw it out and use that new slot.
//!
//! When codegening an SSA register, we look into a lookup table from SSA register to machine register/stack spill and use that.
//!
//! When the list above is full, we spill the register to the stack. This should be rare. If the register doesn't fit into a machine
//! register, it's also spilled.
//!
//! We do a first pass over the function to calculate all the offsets and registers
//! we want to use.
use std::cell::Cell;
use analysis::ir::{self, Func, Location, Register};
use rustc_hash::FxHashMap;
/// A machine register from our register list described in the module documentation.
#[derive(Debug, Clone, Copy)]
pub struct MachineReg(pub usize);
#[derive(Debug, Clone, Copy)]
pub enum RegValue {
/// The SSA register contains an address on the stack.
/// The offset is the offset from the start of the function.
StackRelative { offset: u64 },
/// The SSA register resides on the stack as it has been spilled.
/// This should be rather rare in practice.
Spilled { offset: u64 },
/// The SSA register resides in a machine register.
MachineReg(MachineReg),
}
#[derive(Debug)]
pub struct FunctionLayout {
/// Where a register comes from at a particular usage of a register.
register_uses: FxHashMap<(Location, Register), RegValue>,
total_stack_space: u64,
}
pub fn compute_layout(f: &Func) -> FunctionLayout {
let register_uses = FxHashMap::default();
FunctionLayout {
register_uses,
total_stack_space: 0,
}
}
pub struct LayoutPrinter<'a>(Cell<Option<&'a Func<'a>>>, &'a FunctionLayout);
impl<'a> ir::pretty::Customizer<'a> for LayoutPrinter<'a> {
fn start_func(&self, func: &'a Func<'a>) {
self.0.set(Some(func));
}
fn fmt_reg(
&self,
reg: Register,
f: &mut std::fmt::Formatter<'_>,
loc: Location,
) -> std::fmt::Result {
let layout = self.1.register_uses.get(&(loc, reg));
write!(f, "{{")?;
match self.0.get().unwrap().regs[reg.0 as usize].name {
None => write!(f, "%{}", reg.0)?,
Some(name) => write!(f, "%{name}")?,
}
write!(f, ", ")?;
match layout {
Some(RegValue::MachineReg(mach)) => write!(f, "reg-{}", mach.0)?,
Some(RegValue::Spilled { offset }) => write!(f, "spill-{offset}")?,
Some(RegValue::StackRelative { offset }) => {
write!(f, "i-forgot-what-this-meant-{offset}")?
}
None => write!(f, "<unknown>")?,
}
write!(f, "}}")
}
}
pub fn debug_layout(func: &Func, layout: &FunctionLayout) {
let custom = LayoutPrinter(Cell::default(), layout);
println!("----- code layout");
println!("{}", ir::pretty::func_to_string(func, &custom));
dbg!(layout);
}

View file

@ -5,46 +5,6 @@
//! Then, all IR basic blocks and statements are lowered in a straightforward way.
//! No optimizations are done. There is some basic register allocation.
//!
//! # Register allocation
//!
//! Register allocation is not very smart, but also not too stupid. It tries to put SSA
//! registers into machine registers as much as possible.
//!
//! ```text
//! bb0:
//! %0 = 0
//! %1 = 1
//! %2 = add %0 %1
//! switch %2, then bb1, else bb2
//!
//! bb1:
//! %3 = add %1, 1
//!
//! bb2:
//! %4 = add %2, 2
//! ```
//!
//! For all SSA registers, we establish their "point of last use". This is the bb,stmt where their last usage occurs.
//!
//! First, we establish a list of possible registers to allocate.
//! Since we immediately alloca all parameters, all the param registers are free real estate.
//! Also, `rbx` is always saved on the stack at the start and end.
//!
//! ```text
//! rax, rbx, rdi, rsi, rcx, rdx, r8, r9
//! ```
//!
//! This forms our priority list of registers.
//!
//! Every time a statement has a return value, we try to assign that SSA register into a new machine register.
//! For this, we iterate through the register list above and find the first register that's free. If we see a register
//! that is not used anymore at the current location, we throw it out and use that new slot.
//!
//! When codegening an SSA register, we look into a lookup table from SSA register to machine register/stack spill and use that.
//!
//! When the list above is full, we spill the register to the stack. This should be rare. If the register doesn't fit into a machine
//! register, it's also spilled.
//!
//! ## Registers
//! <https://gitlab.com/x86-psABIs/x86-64-ABI>
//!
@ -78,7 +38,10 @@ use iced_x86::{
use parser::{Error, Span};
use rustc_hash::FxHashMap;
use crate::Result;
use crate::{
registers::{MachineReg, RegValue},
Result,
};
trait IcedErrExt {
type T;
@ -93,22 +56,6 @@ impl<T> IcedErrExt for Result<T, IcedError> {
}
}
/// A machine register from our register list described in the module documentation.
#[derive(Debug, Clone, Copy)]
struct MachineReg(usize);
#[derive(Debug, Clone, Copy)]
enum RegValue {
/// The SSA register contains an address on the stack.
/// The offest is the offset from the start of the function.
StackRelative { offset: u64 },
/// The SSA register resides on the stack as it has been spilled.
/// This should be rather rare in practice.
Spilled { offset: u64 },
/// The SSA register resides in a machine register.
MachineReg(MachineReg),
}
struct AsmCtxt<'cx> {
lcx: &'cx LoweringCx<'cx>,
a: CodeAssembler,
@ -276,6 +223,9 @@ impl<'cx> AsmCtxt<'cx> {
pub fn generate_func<'cx>(lcx: &'cx LoweringCx<'cx>, func: &Func<'cx>) -> Result<Vec<u8>> {
assert_eq!(func.arity, 0, "arguments??? in MY uwucc????");
let layout = crate::registers::compute_layout(func);
crate::registers::debug_layout(func, &layout);
let fn_sp = func.def_span;
let a = CodeAssembler::new(64).sp(fn_sp)?;

View file

@ -42,7 +42,9 @@ where
let r_bp = prefix_binding_power(&Tok::Punct(punct)).ok_or_else(|| {
Error::new(format!("expected expression, found {punct}"), span)
})?;
let Some(op) = unary_op_from_token(&Tok::Punct(punct)) else { panic!() };
let Some(op) = unary_op_from_token(&Tok::Punct(punct)) else {
panic!()
};
let rhs = self.expr_bp(r_bp)?;
self.next_t()?;