diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6ce8b08 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +a.out +*.o +.vscode diff --git a/dump-main.gdb b/dump-main.gdb new file mode 100644 index 0000000..2994383 --- /dev/null +++ b/dump-main.gdb @@ -0,0 +1,3 @@ +set disassembly-flavor intel +set style enabled on +disas main diff --git a/index.js b/index.js index a7dce2a..d25511e 100644 --- a/index.js +++ b/index.js @@ -1,14 +1,16 @@ +import { spawn } from "node:child_process"; import fs from "node:fs/promises"; // https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf // yep. +const BUFFER_LE = true; + class CompilerError extends Error { constructor(message, span) { super(message); - if (!span) { - throw new Error("span must be present"); - } + assertDefined(message); + assertDefined(span); this.span = span; } @@ -33,8 +35,16 @@ class CompilerError extends Error { } function lex(input) { - function alphabetic(char) { - return (char >= "a" && char <= "z") || (char >= "A" && char <= "Z"); + // 6.4.2 Identifiers + function identifierStart(char) { + return ( + (char >= "a" && char <= "z") || + (char >= "A" && char <= "Z") || + char === "_" + ); + } + function identifierCont(char) { + return identifierStart(char) || (char >= "0" && char <= "9"); } const tokens = []; @@ -63,10 +73,10 @@ function lex(input) { integer: Number(number), span, }); - } else if (alphabetic(head)) { + } else if (identifierStart(head)) { const span = i - 1; let ident = head; - while (alphabetic(input[i])) { + while (identifierCont(input[i])) { ident += input[i]; i++; } @@ -95,7 +105,10 @@ function lex(input) { case "{": case "}": case ",": - case ";": { + case ";": + case "+": + case "-": + case "=": { tokens.push({ kind: head, span: i - 1, @@ -250,8 +263,23 @@ function parse(tokens) { return lhs; } - const parseLogicalAnd = generateBinaryParser(["&&"], parsePostfix); + const parseMultiplicative = generateBinaryParser( + ["*", "/", "%"], + parsePostfix + ); + const parseAdditive = generateBinaryParser(["+", "-"], parseMultiplicative); + const parseShift = generateBinaryParser(["<<", ">>"], parseAdditive); + const parseRelational = generateBinaryParser( + ["<", ">", "<=", ">="], + parseShift + ); + const parseEquality = generateBinaryParser(["==", "!="], parseRelational); + const parseAnd = generateBinaryParser(["&"], parseEquality); + const parseExclusiveOr = generateBinaryParser(["^"], parseAnd); + const parseInclusiveOr = generateBinaryParser(["|"], parseExclusiveOr); + const parseLogicalAnd = generateBinaryParser(["&&"], parseInclusiveOr); const parseLogicalOr = generateBinaryParser(["||"], parseLogicalAnd); + // TODO conditional operator const parseAssignment = generateBinaryParser( ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", , "^=", "|="], parseLogicalOr @@ -261,19 +289,65 @@ function parse(tokens) { return parseComma(tok); } + function parseStatement(tok) { + switch (tok.peek()?.kind) { + case "ident": { + switch (tok.peek()?.ident) { + case "return": { + const span = tok.next("return").span; + let rhs = undefined; + if (tok.peek()?.kind !== ";") { + rhs = parseExpr(tok); + } + + return { + kind: "return", + rhs, + span, + }; + } + case "int": { + const typeSpan = tok.next("ident").span; + const name = tok.next("ident"); + + tok.next("="); + + let init; + if (tok.peek()?.kind !== ";") { + init = parseExpr(tok); + } + + return { + kind: "declaration", + type: "int", + name: name.ident, + span: typeSpan, + init, + }; + } + default: { + // fallthrough + } + } + } + default: { + const expr = parseExpr(tok); + return { + kind: "expr", + expr, + span: expr.span, + }; + } + } + } + function parseBlock(tok) { tok.expect("{", "start of block"); const statements = []; while (tok.peek()?.kind !== "}") { - // TODO: non-expression statements - const expr = parseExpr(tok); - statements.push({ - kind: "expr", - expr, - span: expr.span, - }); + statements.push(parseStatement(tok)); tok.expect(";", "end of statement"); } @@ -291,6 +365,9 @@ function parse(tokens) { const params = []; while (tok.peek()?.kind !== ")") { + if (params.length > 0) { + tok.expect(",", "function parameter separator"); + } const type = parseType(tok, "function parameter"); const name = tok.next("function name"); if (name.kind !== "ident") { @@ -311,8 +388,9 @@ function parse(tokens) { return { kind: "function", - ret, name, + params, + ret, body, }; } @@ -331,14 +409,20 @@ function lower(ast) { x86-64 codegen. strategy: every expression returns its result in rax. + + useful: + - https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html + - https://gitlab.com/x86-psABIs/x86-64-ABI */ function littleEndian16(number) { assertDefined(number); + assert(typeof number === "number"); return [number & 0xff, (number >> 8) & 0xff]; } function littleEndian32(number) { assertDefined(number); + assert(typeof number === "number"); return [ number & 0xff, (number >> 8) & 0xff, @@ -346,42 +430,241 @@ function lower(ast) { (number >> 24) & 0xff, ]; } + function littleEndian64(number) { + assertDefined(number); + assert(number <= 0xff_ff_ff_ff); + assert(typeof number === "number"); + return [...littleEndian32(number), 0, 0, 0, 0]; + } + function signedLittleEndian64(number) { + assertDefined(number); + assert(number <= 0xff_ff_ff_ff); + assert(number >= -(0xff_ff_ff_ff + 1)); + assert(typeof number === "number"); + const array = littleEndian64(number); + const signBit = array[3] & 0b10000000; + if (signBit) { + array[4] = 0xff; + array[5] = 0xff; + array[6] = 0xff; + array[7] = 0xff; + } + return array; + } + + const RELOCATIONS = { + R_X86_64_PC32: 2, + R_X86_64_PLT32: 4, + }; + const SYMBOL_TYPES = { + STT_NOTYPE: 0, + STT_FUNC: 2, + }; + const SYMBOL_BINDING = { + STB_LOCAL: 0, + STB_GLOBAL: 1, + }; + const SYMBOL_VISIBILITY = { + STV_DEFAULT: 0, + }; + + // 2.1.3 ModR/M and SIB Bytes const MOD_REG = 0b11; const RM_A = 0b000; const RM_C = 0b001; + const RM_D = 0b010; + const RM_B = 0b011; + const RM_SP = 0b100; + const RM_BP = 0b101; + const RM_SI = 0b110; + const RM_DI = 0b111; const REG_A = RM_A; const REG_C = RM_C; + const REG_D = RM_D; + const REG_B = RM_B; + const REG_SP = RM_SP; + const REG_BP = RM_BP; + const REG_SI = RM_SI; + const REG_DI = RM_DI; + + const PARAM_CALLCONV_REGISTERS = [REG_DI, REG_SI, REG_D, REG_C]; const REG_IGNORED = 0; + function modRm(mod, reg, rm) { + assert(mod <= 0b11); + assert(rm <= 0b111); + assert(reg <= 0b111); + return (mod << 6) | (reg << 3) | rm; + } - function modRm(mod, rm, reg) { - return (mod >> 6) | rm | (reg << 3); + const REX = { + W_OPERAND_SIZE_DETERMINED: 0, + W_64_BIT_OPERAND_SIZE: 1, + }; + + function rex(w, r, x, b) { + assert(w <= 1); + assert(r <= 1); + assert(x <= 1); + assert(b <= 1); + return 0b0100_0000 | (w << 3) | (r << 2) | (x << 1) | b; } class InstBuilder { + /** + * The reserved stack space for locals and intermediary values. + * We subtract rsp by this value in the beginning. + */ #stackSize; + #patches; constructor() { this.out = new Uint8Array(); + this.relocations = []; this.#stackSize = 0; + this.#patches = []; + + this.#prologue(); + } + + #prologue() { + // push rbp + this.pushReg64(REG_BP); // push british petroleum + // mov rbp, rsp + this.movRegReg64(REG_BP, REG_SP); + // sub rsp, SIZE + this.subImm(REG_SP, 0); + this.#patches.push({ + start: this.out.length - 4, + patch: () => littleEndian32(this.#stackSize), + }); + } + + #epilogue() { + // mov rsp, rbp + this.movRegReg64(REG_SP, REG_BP); + // pop rbp + this.popReg64(REG_BP); + } + + finish() { + this.#epilogue(); + this.#patches.forEach((patch) => { + const result = patch.patch(); + assert(Array.isArray(result)); + result.forEach((v, i) => { + this.out[patch.start + i] = v; + }); + }); } reserveStack(size) { + const prev = this.#stackSize; this.#stackSize += size; - return this.#stackSize; + return prev; + } + + addRegs(reg1, reg2) { + // REX.W + 03 /r | ADD r64, r/m64 ; Add r/m64 to r64 + this.#append([ + rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), + 0x03, + modRm(MOD_REG, reg1, reg2), + ]); + } + + subRegs(reg1, reg2) { + // REX.W + 03 /r | ADD r64, r/m64 ; Add r/m64 to r64 + this.#append([ + rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), + 0x2b, + modRm(MOD_REG, reg1, reg2), + ]); + } + + movRegToStackOffset(offset, reg) { + // mov [rsp+{offset}], reg + // 89 /r, MOV r/m64, r64 + this.#append([ + 0x89, + // [--][--]+disp32 + modRm(0b10, reg, 0b100), + 0x24, // SIB: Scaled index: none, Base: ESP + ...littleEndian32(offset), + ]); + } + + movStackOffsetToReg(offset, reg) { + // mov reg, [rsp+{offset}] + // 89 /r, MOV r/m64, r64 + this.#append([ + 0x8b, + // [--][--]+disp32 + modRm(0b10, reg, 0b100), + 0x24, // SIB: Scaled index: none, Base: ESP + ...littleEndian32(offset), + ]); } movEaxImm32(imm) { // mov eax, imm this.#append([ - 0xC7, - modRm(MOD_REG, RM_A, REG_IGNORED), + 0xc7, + modRm(MOD_REG, REG_IGNORED, RM_A), ...littleEndian32(imm), ]); } + movRegReg32(to, from) { + // ; Move r/m32 to r32 + this.#append([0x8b, modRm(MOD_REG, to, from)]); + } + + movRegReg64(to, from) { + // ; Move r/m64 to r64. + this.#append([ + rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), + 0x8b, + modRm(MOD_REG, to, from), + ]); + } + + pushReg64(reg) { + // 58+rd ; Push r64. + this.#append([0x50 | reg]); + } + + popReg64(reg) { + // 50+rd ; Pop top of stack into r64; increment stack pointer. + this.#append([0x58 | reg]); + } + + subImm(reg, imm) { + // REX.W + 81 /5 id ; Subtract imm32 sign-extended to 64-bits from r/m64. + this.#append([ + rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), + 0x81, + modRm(MOD_REG, 5 /* /5*/, reg), + ...littleEndian32(imm), + ]); + } + + call(symbol) { + // call rel32 ; Call near, relative, displacement relative to next + // ; instruction. 32-bit displacement sign extended to + // ; 64-bits in 64-bit mode + this.#append([0xe8]); + this.relocations.push({ + kind: RELOCATIONS.R_X86_64_PC32, // pietro said i should use this even though PC_32 works too + symbol, + offset: this.out.length, + addend: -4, + }); + this.#append([0x0, 0x0, 0x0, 0x0]); + } + ret() { // ret ; near return to calling prodecude this.#append([0xc3]); @@ -392,7 +675,9 @@ function lower(ast) { } } - function codegenExpr(ib, expr) { + function codegenExpr(ctx, expr) { + assert(!Number.isNaN(ctx.offset)); + const { ib, variables } = ctx; switch (expr.kind) { case "call": { if (expr.lhs.kind !== "ident") { @@ -402,7 +687,11 @@ function lower(ast) { throw new Error("bad"); } - const arg0 = codegenExpr(ib, expr.args[0]); + // TODO: save + codegenExpr(ctx, expr.args[0]); + // mov edi, eax + ib.movRegReg32(REG_DI, REG_A); + ib.call(expr.lhs.string); break; } @@ -410,7 +699,33 @@ function lower(ast) { ib.movEaxImm32(expr.integer); break; } + case "ident": { + const offset = [...variables] + .reverse() + .find((v) => v.name === expr.string); + assert(offset); + ib.movStackOffsetToReg(offset.stackOffset + ctx.offset, REG_A); + break; + } case "+": { + // For binary expressions, we first evaluate the LHS, save it on the stack, + // evaluate the RHS, then restore the LHS and perform the operation. + codegenExpr(ctx, expr.lhs); + ib.pushReg64(REG_A); // push rax + codegenExpr({ ...ctx, offset: ctx.offset + 8 }, expr.rhs); + ib.popReg64(REG_C); // pop rcx + + ib.addRegs(REG_A, REG_C); + break; + } + case "-": { + codegenExpr(ctx, expr.rhs); + ib.pushReg64(REG_A); // push rax + codegenExpr({ ...ctx, offset: ctx.offset + 8 }, expr.lhs); + ib.popReg64(REG_C); // pop rcx + + ib.subRegs(REG_A, REG_C); + break; } default: { throw new Error(`unsupported expr: ${expr.kind}`); @@ -420,54 +735,194 @@ function lower(ast) { function codegenFunction(func) { const ib = new InstBuilder(); + const variables = []; + + assert(func.params.length <= 4); + + func.params.forEach((param, i) => { + assert(param.type.kind === "int"); + const offset = ib.reserveStack(4); + ib.movRegToStackOffset(offset, PARAM_CALLCONV_REGISTERS[i]); + variables.push({ + name: param.name, + stackOffset: offset, + }); + }); for (const stmt of func.body) { - console.log("doing cg for", stmt.kind); - + const ctx = { ib, variables, offset: 0 }; switch (stmt.kind) { case "expr": { - codegenExpr(ib, stmt.expr); + codegenExpr(ctx, stmt.expr); + break; + } + case "return": { + if (stmt.rhs) { + codegenExpr(ctx, stmt.rhs); + } + ib.finish(); + ib.ret(); + break; + } + case "declaration": { + assert(stmt.type === "int"); + if (stmt.init) { + codegenExpr(ctx, stmt.init); + } + const slot = ib.reserveStack(4); + variables.push({ + name: stmt.name, + stackOffset: slot, + }); + // mov [rsp+{slot}], eax + ib.movRegToStackOffset(slot, REG_A); break; } default: { - throw new Error(`unsupported stmt: ${stmt.kind}`); + assert(false); } } } ib.movEaxImm32(0); + ib.finish(); ib.ret(); return ib; } class BufferBuilder { + #buffer; constructor() { - this.buffer = new Uint8Array(); + this.#buffer = new ArrayBuffer(0, { maxByteLength: 2 ** 32 }); + } + #appendUintGeneric(size, name, int) { + assertDefined(int); + assert(typeof int === "number" || typeof int === "bigint"); + + const oldLength = this.length; + this.#buffer.resize(oldLength + size); + new DataView(this.#buffer)[name](oldLength, int, BUFFER_LE); + } + appendUint8(int) { + this.#appendUintGeneric(1, "setUint8", int); + } + appendUint16Le(int) { + this.#appendUintGeneric(2, "setUint16", int); + } + appendUint32Le(int) { + this.#appendUintGeneric(4, "setUint32", int); + } + appendUint64Le(int) { + assert(typeof int === "number"); + this.#appendUintGeneric(8, "setBigUint64", BigInt(int)); } append(array) { assertDefined(array); - this.buffer = Buffer.concat([this.buffer, new Uint8Array(array)]); + array.forEach((elem) => assert(typeof elem === "number")); + + const oldLength = this.length; + const newLength = oldLength + array.length; + this.#buffer.resize(newLength); + + for (let i = 0; i < array.length; i++) { + new DataView(this.#buffer).setUint8(oldLength + i, array[i]); + } + } + get length() { + return this.#buffer.byteLength; } get currentPos() { - return this.buffer.length; + return this.length; + } + writeUint16Le(offset, int) { + assertDefined(offset, int); + new DataView(this.#buffer).setUint16(offset, int, BUFFER_LE); + } + writeUint32Le(offset, int) { + assertDefined(offset, int); + new DataView(this.#buffer).setUint32(offset, int, BUFFER_LE); + } + toUint8Array() { + return new Uint8Array(this.#buffer); } } function generateObjectFile(funcs) { - if (funcs.length !== 1) { - throw new Error("bad"); + const alignTo = (out, align) => { + assertDefined(out, align); + const missing = out.length % align; + if (missing === 0) { + return; + } + const up = align - missing; + out.append(Array(up).fill(0)); + }; + + function layoutFuncs(funcs) { + const textContent = new BufferBuilder(); + + const textRelativeSymbols = []; + const relocations = []; + + funcs.forEach((func) => { + alignTo(textContent, 8); // i think this is not actually necessary. + const offset = textContent.length; + textRelativeSymbols.push({ + name: func.name, + offset, + size: func.code.length, + }); + relocations.push( + ...func.relocations.map((relocation) => ({ + kind: relocation.kind, + symbol: relocation.symbol, + addend: relocation.addend, + offset: offset + relocation.offset, + })) + ); + textContent.append(func.code); + }); + + return { + textContent: textContent.toUint8Array(), + textRelativeSymbols, + relocations, + }; } - const textContent = funcs[0].code; - const textRelativeSymbols = [ + const symbols = [ { - name: funcs[0].name, - offset: 0, - size: funcs[0].code.length, + name: "", + type: SYMBOL_TYPES.STT_NOTYPE, + binding: SYMBOL_BINDING.STB_LOCAL, + visibility: SYMBOL_VISIBILITY.STV_DEFAULT, + sectionIndex: 0, + value: 0, + size: 0, }, ]; + const { + textContent, + textRelativeSymbols, + relocations: funcRelocations, + } = layoutFuncs(funcs); + + for (const sym of textRelativeSymbols) { + symbols.push({ + name: sym.name, + type: SYMBOL_TYPES.STT_FUNC, + binding: SYMBOL_BINDING.STB_GLOBAL, + visibility: SYMBOL_VISIBILITY.STV_DEFAULT, + sectionIndex: 1 /*.text*/, + value: sym.offset, + size: sym.size, + }); + } + + console.log("relocations", funcRelocations); + let out = new BufferBuilder(); // ident out.append([0x7f, "E".charCodeAt(0), "L".charCodeAt(0), "F".charCodeAt(0)]); @@ -513,11 +968,7 @@ function lower(ast) { // Let's write some section headers. - const shoff = littleEndian32(out.currentPos); - out.buffer[shoffRef] = shoff[0]; - out.buffer[shoffRef + 1] = shoff[1]; - out.buffer[shoffRef + 2] = shoff[2]; - out.buffer[shoffRef + 3] = shoff[3]; + out.writeUint32Le(shoffRef, out.currentPos); class NullTerminatedStringStore { #offsets; @@ -529,7 +980,7 @@ function lower(ast) { if (this.#offsets.has(str)) { return this.#offsets.get(str); } - const offset = this.out.buffer.length; + const offset = this.out.length; this.#offsets.set(str, offset); this.out.append(new TextEncoder("utf-8").encode(str)); this.out.append([0]); @@ -547,27 +998,17 @@ function lower(ast) { const writeSectionHeader = (name, sh) => { sectionCount++; const nameIndex = shstrs.pushAndGet(name); - out.append([ - ...littleEndian32(nameIndex), - ...littleEndian32(sh.type), - ...littleEndian32(sh.flags), - ...[0, 0, 0, 0], // flag pad - ...littleEndian32(sh.addr), - ...[0, 0, 0, 0], - ]); + out.appendUint32Le(nameIndex); + out.appendUint32Le(sh.type); + out.appendUint64Le(sh.flags); + out.appendUint64Le(sh.addr); sectionOffsetRefs[name] = out.currentPos; - out.append([ - ...littleEndian32(sh.offset), - ...[0, 0, 0, 0], - ...littleEndian32(sh.size), - ...[0, 0, 0, 0], - ...littleEndian32(sh.link), - ...littleEndian32(sh.info), - ...littleEndian32(sh.addralign), - ...[0, 0, 0, 0], - ...littleEndian32(sh.entsize), - ...[0, 0, 0, 0], - ]); + out.appendUint64Le(sh.offset); + out.appendUint64Le(sh.size); + out.appendUint32Le(sh.link); + out.appendUint32Le(sh.info); + out.appendUint64Le(sh.addralign); + out.appendUint64Le(sh.entsize); }; // null section @@ -584,6 +1025,7 @@ function lower(ast) { }); // text section + const textIndex = sectionCount; writeSectionHeader(".text", { type: /*SHT_PROGBITS*/ 1, flags: /*SHF_ALLOC*/ (1 << 1) | /*SHF_EXECINSTR*/ (1 << 2), @@ -596,38 +1038,72 @@ function lower(ast) { entsize: 0, }); + const rel = new BufferBuilder(); + for (const relocation of funcRelocations) { + let idx = symbols.findIndex((sym) => sym.name === relocation.symbol); + if (idx === -1) { + idx = symbols.length; + symbols.push({ + name: relocation.symbol, + type: SYMBOL_TYPES.STT_NOTYPE, + binding: SYMBOL_BINDING.STB_GLOBAL, + visibility: SYMBOL_VISIBILITY.STV_DEFAULT, + sectionIndex: 0, + value: 0, + size: 0, + }); + } + // r_offset + rel.appendUint64Le(relocation.offset); + // r_info type,sym + rel.appendUint32Le(relocation.kind); + rel.appendUint32Le(idx); + // r_addend + rel.appendUint64Le(relocation.addend); + } + const symtabIndex = sectionCount + 1; + writeSectionHeader(".rela", { + type: /*SHT_RELA*/ 4, + flags: 0, + addr: 0, + offset: 0, + size: rel.length, + link: symtabIndex, + info: textIndex, + addralign: 8, + entsize: 24, + }); + const symtab = new BufferBuilder(); const nameToSymIdx = new Map(); let symIdx = 0; - for (const sym of textRelativeSymbols) { + for (const sym of symbols) { const nameIdx = strs.pushAndGet(sym.name); + symtab.appendUint32Le(nameIdx); + symtab.appendUint8(sym.type | (sym.binding << 4)); + symtab.appendUint8(sym.visibility); + symtab.appendUint16Le(sym.sectionIndex); + symtab.appendUint64Le(sym.value); + symtab.appendUint64Le(sym.size); - symtab.append([ - ...littleEndian32(nameIdx), - /*STT_FUNC*/ 2 | /*STB_GLOBAL*/ (1 << 4), - /*STV_DEFAULT*/ 0, - /*shndx .text*/ ...littleEndian16(1), - /*value*/ ...littleEndian32(sym.offset), - ...[0, 0, 0, 0], - /*size*/ ...littleEndian32(sym.size), - ...[0, 0, 0, 0], - ]); nameToSymIdx.set(sym.name, symIdx); symIdx++; } - console.log(symtab); - // symtab section const strTableIndex = sectionCount + 1; + const firstGlobal = symbols.findIndex( + (sym) => sym.binding === SYMBOL_BINDING.STB_GLOBAL + ); + assertDefined(firstGlobal); writeSectionHeader(".symtab", { type: /*SHT_SYMTAB*/ 2, flags: 0, addr: 0, offset: 0, - size: symtab.buffer.length, + size: symtab.length, link: strTableIndex, - info: 0, + info: firstGlobal, addralign: 8, entsize: 24, }); @@ -638,20 +1114,17 @@ function lower(ast) { flags: 0, addr: 0, offset: 0, - size: strs.out.buffer.length, + size: strs.out.length, link: 0, info: 0, addralign: 1, entsize: 0, }); - const shstrndx = littleEndian32(sectionCount); - out.buffer[shstrndxRef] = shstrndx[0]; - out.buffer[shstrndxRef + 1] = shstrndx[1]; + out.writeUint16Le(shstrndxRef, sectionCount); - const totalSectionCount = littleEndian32(sectionCount + 1); - out.buffer[shnumRef] = totalSectionCount[0]; - out.buffer[shnumRef + 1] = totalSectionCount[1]; + const totalSectionCount = sectionCount + 1; + out.writeUint16Le(shnumRef, totalSectionCount); // shstrtab section writeSectionHeader(".shstrtab", { @@ -659,43 +1132,37 @@ function lower(ast) { flags: 0, addr: 0, offset: 0, - size: shstrs.out.buffer.length, + size: shstrs.out.length, link: 0, info: 0, addralign: 1, entsize: 0, }); - const alignTo = (align) => { - assertDefined(align); - const up = align - (out.buffer.length % align); - out.append(Array(up).fill(0)); - }; - const patch32 = (baseOffset, value) => { assertDefined(baseOffset, value); - const encoded = littleEndian32(value); - out.buffer[baseOffset] = encoded[0]; - out.buffer[baseOffset + 1] = encoded[1]; - out.buffer[baseOffset + 2] = encoded[2]; - out.buffer[baseOffset + 3] = encoded[3]; + out.writeUint32Le(baseOffset, value); }; - alignTo(16); + alignTo(out, 16); patch32(sectionOffsetRefs[".text"], out.currentPos); out.append(textContent); - patch32(sectionOffsetRefs[".strtab"], out.currentPos); - out.append(strs.out.buffer); + alignTo(out, 8); + patch32(sectionOffsetRefs[".rela"], out.currentPos); + out.append(rel.toUint8Array()); - alignTo(8); + patch32(sectionOffsetRefs[".strtab"], out.currentPos); + out.append(strs.out.toUint8Array()); + + alignTo(out, 8); patch32(sectionOffsetRefs[".symtab"], out.currentPos); - out.append(symtab.buffer); + out.append(symtab.toUint8Array()); patch32(sectionOffsetRefs[".shstrtab"], out.currentPos); - out.append(shstrs.out.buffer); + out.append(shstrs.out.toUint8Array()); - return out.buffer; + return out.toUint8Array(); } const funcs = []; @@ -705,29 +1172,60 @@ function lower(ast) { funcs.push({ name: func.name.ident, code: ib.out, + relocations: ib.relocations, }); } - console.log(funcs); + console.dir(funcs, { depth: 5 }); const obj = generateObjectFile(funcs); return obj; } +async function link(object) { + async function execWithForwardedOutput(command, args) { + return new Promise((resolve, reject) => { + const child = spawn(command, args, { + stdio: "inherit", + }); + child.on("close", (code) => { + if (code === 0) { + resolve(); + } else { + reject(new CompilerError("gcc failed to link", 0)); + } + }); + }); + } + + // we could use a temporary directory in the future, but let's keep this debuggable for now + const outputFile = "output.o"; + fs.writeFile(outputFile, object); + + await execWithForwardedOutput("gcc", ["-Wl,-znoexecstack", outputFile]); + await execWithForwardedOutput("gdb", [ + "--batch", + "--command", + "dump-main.gdb", + "a.out", + ]); +} + async function compile(input) { const tokens = lex(input); - console.log(tokens); + console.log("tokens", tokens); const ast = parse(tokens); + console.log("ast"); console.dir(ast, { depth: 20 }); const object = lower(ast); - fs.writeFile("output.o", object); + return link(object); } const fileName = process.argv[2]; const input = await fs.readFile(fileName, "utf-8"); -console.log(input); +console.log("input", input); try { await compile(input); @@ -739,8 +1237,16 @@ try { } } -function assertDefined(...values) { - if (values.some((value) => value === undefined || value === null)) { - throw new Error(`assertion failed, value undefined or null`); +function assert(condition) { + if (!condition) { + throw new Error("assertion failed"); } } + +function assertDefined(...values) { + values.forEach((value, i) => { + if (value === null || value === undefined) { + throw new Error(`assertion failed, argument ${i} undefined or nu ll`); + } + }); +} diff --git a/input.c b/input.c index d11299b..03aabbd 100644 --- a/input.c +++ b/input.c @@ -1,6 +1,12 @@ -//#include +// #include -int main(int argc) +int main(int argc, int argv) { - // exit(42); + int x = 100; + return thisismyfakeconstantbecauseidonthaveconstant(x - 1); +} + +int thisismyfakeconstantbecauseidonthaveconstant(int x) +{ + return 1 + 1; } diff --git a/shell.nix b/shell.nix index d0e18d1..5709ffc 100644 --- a/shell.nix +++ b/shell.nix @@ -1,3 +1,3 @@ -{ pkgs ? import {} }: pkgs.mkShell { - nativeBuildInputs = with pkgs; [ nodejs_22 ]; +{ pkgs ? import { } }: pkgs.mkShell { + nativeBuildInputs = with pkgs; [ nodejs_22 llvmPackages_18.lld llvmPackages_18.clang ]; }