diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 6ce8b08..0000000 --- a/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -a.out -*.o -.vscode diff --git a/dump-main.gdb b/dump-main.gdb deleted file mode 100644 index 2994383..0000000 --- a/dump-main.gdb +++ /dev/null @@ -1,3 +0,0 @@ -set disassembly-flavor intel -set style enabled on -disas main diff --git a/index.js b/index.js index d25511e..a7dce2a 100644 --- a/index.js +++ b/index.js @@ -1,16 +1,14 @@ -import { spawn } from "node:child_process"; import fs from "node:fs/promises"; // https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf // yep. -const BUFFER_LE = true; - class CompilerError extends Error { constructor(message, span) { super(message); - assertDefined(message); - assertDefined(span); + if (!span) { + throw new Error("span must be present"); + } this.span = span; } @@ -35,16 +33,8 @@ class CompilerError extends Error { } function lex(input) { - // 6.4.2 Identifiers - function identifierStart(char) { - return ( - (char >= "a" && char <= "z") || - (char >= "A" && char <= "Z") || - char === "_" - ); - } - function identifierCont(char) { - return identifierStart(char) || (char >= "0" && char <= "9"); + function alphabetic(char) { + return (char >= "a" && char <= "z") || (char >= "A" && char <= "Z"); } const tokens = []; @@ -73,10 +63,10 @@ function lex(input) { integer: Number(number), span, }); - } else if (identifierStart(head)) { + } else if (alphabetic(head)) { const span = i - 1; let ident = head; - while (identifierCont(input[i])) { + while (alphabetic(input[i])) { ident += input[i]; i++; } @@ -105,10 +95,7 @@ function lex(input) { case "{": case "}": case ",": - case ";": - case "+": - case "-": - case "=": { + case ";": { tokens.push({ kind: head, span: i - 1, @@ -263,23 +250,8 @@ function parse(tokens) { return lhs; } - const parseMultiplicative = generateBinaryParser( - ["*", "/", "%"], - parsePostfix - ); - const parseAdditive = generateBinaryParser(["+", "-"], parseMultiplicative); - const parseShift = generateBinaryParser(["<<", ">>"], parseAdditive); - const parseRelational = generateBinaryParser( - ["<", ">", "<=", ">="], - parseShift - ); - const parseEquality = generateBinaryParser(["==", "!="], parseRelational); - const parseAnd = generateBinaryParser(["&"], parseEquality); - const parseExclusiveOr = generateBinaryParser(["^"], parseAnd); - const parseInclusiveOr = generateBinaryParser(["|"], parseExclusiveOr); - const parseLogicalAnd = generateBinaryParser(["&&"], parseInclusiveOr); + const parseLogicalAnd = generateBinaryParser(["&&"], parsePostfix); const parseLogicalOr = generateBinaryParser(["||"], parseLogicalAnd); - // TODO conditional operator const parseAssignment = generateBinaryParser( ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", , "^=", "|="], parseLogicalOr @@ -289,65 +261,19 @@ function parse(tokens) { return parseComma(tok); } - function parseStatement(tok) { - switch (tok.peek()?.kind) { - case "ident": { - switch (tok.peek()?.ident) { - case "return": { - const span = tok.next("return").span; - let rhs = undefined; - if (tok.peek()?.kind !== ";") { - rhs = parseExpr(tok); - } - - return { - kind: "return", - rhs, - span, - }; - } - case "int": { - const typeSpan = tok.next("ident").span; - const name = tok.next("ident"); - - tok.next("="); - - let init; - if (tok.peek()?.kind !== ";") { - init = parseExpr(tok); - } - - return { - kind: "declaration", - type: "int", - name: name.ident, - span: typeSpan, - init, - }; - } - default: { - // fallthrough - } - } - } - default: { - const expr = parseExpr(tok); - return { - kind: "expr", - expr, - span: expr.span, - }; - } - } - } - function parseBlock(tok) { tok.expect("{", "start of block"); const statements = []; while (tok.peek()?.kind !== "}") { - statements.push(parseStatement(tok)); + // TODO: non-expression statements + const expr = parseExpr(tok); + statements.push({ + kind: "expr", + expr, + span: expr.span, + }); tok.expect(";", "end of statement"); } @@ -365,9 +291,6 @@ function parse(tokens) { const params = []; while (tok.peek()?.kind !== ")") { - if (params.length > 0) { - tok.expect(",", "function parameter separator"); - } const type = parseType(tok, "function parameter"); const name = tok.next("function name"); if (name.kind !== "ident") { @@ -388,9 +311,8 @@ function parse(tokens) { return { kind: "function", - name, - params, ret, + name, body, }; } @@ -409,20 +331,14 @@ function lower(ast) { x86-64 codegen. strategy: every expression returns its result in rax. - - useful: - - https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html - - https://gitlab.com/x86-psABIs/x86-64-ABI */ function littleEndian16(number) { assertDefined(number); - assert(typeof number === "number"); return [number & 0xff, (number >> 8) & 0xff]; } function littleEndian32(number) { assertDefined(number); - assert(typeof number === "number"); return [ number & 0xff, (number >> 8) & 0xff, @@ -430,241 +346,42 @@ function lower(ast) { (number >> 24) & 0xff, ]; } - function littleEndian64(number) { - assertDefined(number); - assert(number <= 0xff_ff_ff_ff); - assert(typeof number === "number"); - return [...littleEndian32(number), 0, 0, 0, 0]; - } - function signedLittleEndian64(number) { - assertDefined(number); - assert(number <= 0xff_ff_ff_ff); - assert(number >= -(0xff_ff_ff_ff + 1)); - assert(typeof number === "number"); - const array = littleEndian64(number); - const signBit = array[3] & 0b10000000; - if (signBit) { - array[4] = 0xff; - array[5] = 0xff; - array[6] = 0xff; - array[7] = 0xff; - } - return array; - } - - const RELOCATIONS = { - R_X86_64_PC32: 2, - R_X86_64_PLT32: 4, - }; - const SYMBOL_TYPES = { - STT_NOTYPE: 0, - STT_FUNC: 2, - }; - const SYMBOL_BINDING = { - STB_LOCAL: 0, - STB_GLOBAL: 1, - }; - const SYMBOL_VISIBILITY = { - STV_DEFAULT: 0, - }; - - // 2.1.3 ModR/M and SIB Bytes const MOD_REG = 0b11; const RM_A = 0b000; const RM_C = 0b001; - const RM_D = 0b010; - const RM_B = 0b011; - const RM_SP = 0b100; - const RM_BP = 0b101; - const RM_SI = 0b110; - const RM_DI = 0b111; const REG_A = RM_A; const REG_C = RM_C; - const REG_D = RM_D; - const REG_B = RM_B; - const REG_SP = RM_SP; - const REG_BP = RM_BP; - const REG_SI = RM_SI; - const REG_DI = RM_DI; - - const PARAM_CALLCONV_REGISTERS = [REG_DI, REG_SI, REG_D, REG_C]; const REG_IGNORED = 0; - function modRm(mod, reg, rm) { - assert(mod <= 0b11); - assert(rm <= 0b111); - assert(reg <= 0b111); - return (mod << 6) | (reg << 3) | rm; - } - const REX = { - W_OPERAND_SIZE_DETERMINED: 0, - W_64_BIT_OPERAND_SIZE: 1, - }; - - function rex(w, r, x, b) { - assert(w <= 1); - assert(r <= 1); - assert(x <= 1); - assert(b <= 1); - return 0b0100_0000 | (w << 3) | (r << 2) | (x << 1) | b; + function modRm(mod, rm, reg) { + return (mod >> 6) | rm | (reg << 3); } class InstBuilder { - /** - * The reserved stack space for locals and intermediary values. - * We subtract rsp by this value in the beginning. - */ #stackSize; - #patches; constructor() { this.out = new Uint8Array(); - this.relocations = []; this.#stackSize = 0; - this.#patches = []; - - this.#prologue(); - } - - #prologue() { - // push rbp - this.pushReg64(REG_BP); // push british petroleum - // mov rbp, rsp - this.movRegReg64(REG_BP, REG_SP); - // sub rsp, SIZE - this.subImm(REG_SP, 0); - this.#patches.push({ - start: this.out.length - 4, - patch: () => littleEndian32(this.#stackSize), - }); - } - - #epilogue() { - // mov rsp, rbp - this.movRegReg64(REG_SP, REG_BP); - // pop rbp - this.popReg64(REG_BP); - } - - finish() { - this.#epilogue(); - this.#patches.forEach((patch) => { - const result = patch.patch(); - assert(Array.isArray(result)); - result.forEach((v, i) => { - this.out[patch.start + i] = v; - }); - }); } reserveStack(size) { - const prev = this.#stackSize; this.#stackSize += size; - return prev; - } - - addRegs(reg1, reg2) { - // REX.W + 03 /r | ADD r64, r/m64 ; Add r/m64 to r64 - this.#append([ - rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), - 0x03, - modRm(MOD_REG, reg1, reg2), - ]); - } - - subRegs(reg1, reg2) { - // REX.W + 03 /r | ADD r64, r/m64 ; Add r/m64 to r64 - this.#append([ - rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), - 0x2b, - modRm(MOD_REG, reg1, reg2), - ]); - } - - movRegToStackOffset(offset, reg) { - // mov [rsp+{offset}], reg - // 89 /r, MOV r/m64, r64 - this.#append([ - 0x89, - // [--][--]+disp32 - modRm(0b10, reg, 0b100), - 0x24, // SIB: Scaled index: none, Base: ESP - ...littleEndian32(offset), - ]); - } - - movStackOffsetToReg(offset, reg) { - // mov reg, [rsp+{offset}] - // 89 /r, MOV r/m64, r64 - this.#append([ - 0x8b, - // [--][--]+disp32 - modRm(0b10, reg, 0b100), - 0x24, // SIB: Scaled index: none, Base: ESP - ...littleEndian32(offset), - ]); + return this.#stackSize; } movEaxImm32(imm) { // mov eax, imm this.#append([ - 0xc7, - modRm(MOD_REG, REG_IGNORED, RM_A), + 0xC7, + modRm(MOD_REG, RM_A, REG_IGNORED), ...littleEndian32(imm), ]); } - movRegReg32(to, from) { - // ; Move r/m32 to r32 - this.#append([0x8b, modRm(MOD_REG, to, from)]); - } - - movRegReg64(to, from) { - // ; Move r/m64 to r64. - this.#append([ - rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), - 0x8b, - modRm(MOD_REG, to, from), - ]); - } - - pushReg64(reg) { - // 58+rd ; Push r64. - this.#append([0x50 | reg]); - } - - popReg64(reg) { - // 50+rd ; Pop top of stack into r64; increment stack pointer. - this.#append([0x58 | reg]); - } - - subImm(reg, imm) { - // REX.W + 81 /5 id ; Subtract imm32 sign-extended to 64-bits from r/m64. - this.#append([ - rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), - 0x81, - modRm(MOD_REG, 5 /* /5*/, reg), - ...littleEndian32(imm), - ]); - } - - call(symbol) { - // call rel32 ; Call near, relative, displacement relative to next - // ; instruction. 32-bit displacement sign extended to - // ; 64-bits in 64-bit mode - this.#append([0xe8]); - this.relocations.push({ - kind: RELOCATIONS.R_X86_64_PC32, // pietro said i should use this even though PC_32 works too - symbol, - offset: this.out.length, - addend: -4, - }); - this.#append([0x0, 0x0, 0x0, 0x0]); - } - ret() { // ret ; near return to calling prodecude this.#append([0xc3]); @@ -675,9 +392,7 @@ function lower(ast) { } } - function codegenExpr(ctx, expr) { - assert(!Number.isNaN(ctx.offset)); - const { ib, variables } = ctx; + function codegenExpr(ib, expr) { switch (expr.kind) { case "call": { if (expr.lhs.kind !== "ident") { @@ -687,11 +402,7 @@ function lower(ast) { throw new Error("bad"); } - // TODO: save - codegenExpr(ctx, expr.args[0]); - // mov edi, eax - ib.movRegReg32(REG_DI, REG_A); - ib.call(expr.lhs.string); + const arg0 = codegenExpr(ib, expr.args[0]); break; } @@ -699,33 +410,7 @@ function lower(ast) { ib.movEaxImm32(expr.integer); break; } - case "ident": { - const offset = [...variables] - .reverse() - .find((v) => v.name === expr.string); - assert(offset); - ib.movStackOffsetToReg(offset.stackOffset + ctx.offset, REG_A); - break; - } case "+": { - // For binary expressions, we first evaluate the LHS, save it on the stack, - // evaluate the RHS, then restore the LHS and perform the operation. - codegenExpr(ctx, expr.lhs); - ib.pushReg64(REG_A); // push rax - codegenExpr({ ...ctx, offset: ctx.offset + 8 }, expr.rhs); - ib.popReg64(REG_C); // pop rcx - - ib.addRegs(REG_A, REG_C); - break; - } - case "-": { - codegenExpr(ctx, expr.rhs); - ib.pushReg64(REG_A); // push rax - codegenExpr({ ...ctx, offset: ctx.offset + 8 }, expr.lhs); - ib.popReg64(REG_C); // pop rcx - - ib.subRegs(REG_A, REG_C); - break; } default: { throw new Error(`unsupported expr: ${expr.kind}`); @@ -735,194 +420,54 @@ function lower(ast) { function codegenFunction(func) { const ib = new InstBuilder(); - const variables = []; - - assert(func.params.length <= 4); - - func.params.forEach((param, i) => { - assert(param.type.kind === "int"); - const offset = ib.reserveStack(4); - ib.movRegToStackOffset(offset, PARAM_CALLCONV_REGISTERS[i]); - variables.push({ - name: param.name, - stackOffset: offset, - }); - }); for (const stmt of func.body) { - const ctx = { ib, variables, offset: 0 }; + console.log("doing cg for", stmt.kind); + switch (stmt.kind) { case "expr": { - codegenExpr(ctx, stmt.expr); - break; - } - case "return": { - if (stmt.rhs) { - codegenExpr(ctx, stmt.rhs); - } - ib.finish(); - ib.ret(); - break; - } - case "declaration": { - assert(stmt.type === "int"); - if (stmt.init) { - codegenExpr(ctx, stmt.init); - } - const slot = ib.reserveStack(4); - variables.push({ - name: stmt.name, - stackOffset: slot, - }); - // mov [rsp+{slot}], eax - ib.movRegToStackOffset(slot, REG_A); + codegenExpr(ib, stmt.expr); break; } default: { - assert(false); + throw new Error(`unsupported stmt: ${stmt.kind}`); } } } ib.movEaxImm32(0); - ib.finish(); ib.ret(); return ib; } class BufferBuilder { - #buffer; constructor() { - this.#buffer = new ArrayBuffer(0, { maxByteLength: 2 ** 32 }); - } - #appendUintGeneric(size, name, int) { - assertDefined(int); - assert(typeof int === "number" || typeof int === "bigint"); - - const oldLength = this.length; - this.#buffer.resize(oldLength + size); - new DataView(this.#buffer)[name](oldLength, int, BUFFER_LE); - } - appendUint8(int) { - this.#appendUintGeneric(1, "setUint8", int); - } - appendUint16Le(int) { - this.#appendUintGeneric(2, "setUint16", int); - } - appendUint32Le(int) { - this.#appendUintGeneric(4, "setUint32", int); - } - appendUint64Le(int) { - assert(typeof int === "number"); - this.#appendUintGeneric(8, "setBigUint64", BigInt(int)); + this.buffer = new Uint8Array(); } append(array) { assertDefined(array); - array.forEach((elem) => assert(typeof elem === "number")); - - const oldLength = this.length; - const newLength = oldLength + array.length; - this.#buffer.resize(newLength); - - for (let i = 0; i < array.length; i++) { - new DataView(this.#buffer).setUint8(oldLength + i, array[i]); - } - } - get length() { - return this.#buffer.byteLength; + this.buffer = Buffer.concat([this.buffer, new Uint8Array(array)]); } get currentPos() { - return this.length; - } - writeUint16Le(offset, int) { - assertDefined(offset, int); - new DataView(this.#buffer).setUint16(offset, int, BUFFER_LE); - } - writeUint32Le(offset, int) { - assertDefined(offset, int); - new DataView(this.#buffer).setUint32(offset, int, BUFFER_LE); - } - toUint8Array() { - return new Uint8Array(this.#buffer); + return this.buffer.length; } } function generateObjectFile(funcs) { - const alignTo = (out, align) => { - assertDefined(out, align); - const missing = out.length % align; - if (missing === 0) { - return; - } - const up = align - missing; - out.append(Array(up).fill(0)); - }; - - function layoutFuncs(funcs) { - const textContent = new BufferBuilder(); - - const textRelativeSymbols = []; - const relocations = []; - - funcs.forEach((func) => { - alignTo(textContent, 8); // i think this is not actually necessary. - const offset = textContent.length; - textRelativeSymbols.push({ - name: func.name, - offset, - size: func.code.length, - }); - relocations.push( - ...func.relocations.map((relocation) => ({ - kind: relocation.kind, - symbol: relocation.symbol, - addend: relocation.addend, - offset: offset + relocation.offset, - })) - ); - textContent.append(func.code); - }); - - return { - textContent: textContent.toUint8Array(), - textRelativeSymbols, - relocations, - }; + if (funcs.length !== 1) { + throw new Error("bad"); } - const symbols = [ + const textContent = funcs[0].code; + const textRelativeSymbols = [ { - name: "", - type: SYMBOL_TYPES.STT_NOTYPE, - binding: SYMBOL_BINDING.STB_LOCAL, - visibility: SYMBOL_VISIBILITY.STV_DEFAULT, - sectionIndex: 0, - value: 0, - size: 0, + name: funcs[0].name, + offset: 0, + size: funcs[0].code.length, }, ]; - const { - textContent, - textRelativeSymbols, - relocations: funcRelocations, - } = layoutFuncs(funcs); - - for (const sym of textRelativeSymbols) { - symbols.push({ - name: sym.name, - type: SYMBOL_TYPES.STT_FUNC, - binding: SYMBOL_BINDING.STB_GLOBAL, - visibility: SYMBOL_VISIBILITY.STV_DEFAULT, - sectionIndex: 1 /*.text*/, - value: sym.offset, - size: sym.size, - }); - } - - console.log("relocations", funcRelocations); - let out = new BufferBuilder(); // ident out.append([0x7f, "E".charCodeAt(0), "L".charCodeAt(0), "F".charCodeAt(0)]); @@ -968,7 +513,11 @@ function lower(ast) { // Let's write some section headers. - out.writeUint32Le(shoffRef, out.currentPos); + const shoff = littleEndian32(out.currentPos); + out.buffer[shoffRef] = shoff[0]; + out.buffer[shoffRef + 1] = shoff[1]; + out.buffer[shoffRef + 2] = shoff[2]; + out.buffer[shoffRef + 3] = shoff[3]; class NullTerminatedStringStore { #offsets; @@ -980,7 +529,7 @@ function lower(ast) { if (this.#offsets.has(str)) { return this.#offsets.get(str); } - const offset = this.out.length; + const offset = this.out.buffer.length; this.#offsets.set(str, offset); this.out.append(new TextEncoder("utf-8").encode(str)); this.out.append([0]); @@ -998,17 +547,27 @@ function lower(ast) { const writeSectionHeader = (name, sh) => { sectionCount++; const nameIndex = shstrs.pushAndGet(name); - out.appendUint32Le(nameIndex); - out.appendUint32Le(sh.type); - out.appendUint64Le(sh.flags); - out.appendUint64Le(sh.addr); + out.append([ + ...littleEndian32(nameIndex), + ...littleEndian32(sh.type), + ...littleEndian32(sh.flags), + ...[0, 0, 0, 0], // flag pad + ...littleEndian32(sh.addr), + ...[0, 0, 0, 0], + ]); sectionOffsetRefs[name] = out.currentPos; - out.appendUint64Le(sh.offset); - out.appendUint64Le(sh.size); - out.appendUint32Le(sh.link); - out.appendUint32Le(sh.info); - out.appendUint64Le(sh.addralign); - out.appendUint64Le(sh.entsize); + out.append([ + ...littleEndian32(sh.offset), + ...[0, 0, 0, 0], + ...littleEndian32(sh.size), + ...[0, 0, 0, 0], + ...littleEndian32(sh.link), + ...littleEndian32(sh.info), + ...littleEndian32(sh.addralign), + ...[0, 0, 0, 0], + ...littleEndian32(sh.entsize), + ...[0, 0, 0, 0], + ]); }; // null section @@ -1025,7 +584,6 @@ function lower(ast) { }); // text section - const textIndex = sectionCount; writeSectionHeader(".text", { type: /*SHT_PROGBITS*/ 1, flags: /*SHF_ALLOC*/ (1 << 1) | /*SHF_EXECINSTR*/ (1 << 2), @@ -1038,72 +596,38 @@ function lower(ast) { entsize: 0, }); - const rel = new BufferBuilder(); - for (const relocation of funcRelocations) { - let idx = symbols.findIndex((sym) => sym.name === relocation.symbol); - if (idx === -1) { - idx = symbols.length; - symbols.push({ - name: relocation.symbol, - type: SYMBOL_TYPES.STT_NOTYPE, - binding: SYMBOL_BINDING.STB_GLOBAL, - visibility: SYMBOL_VISIBILITY.STV_DEFAULT, - sectionIndex: 0, - value: 0, - size: 0, - }); - } - // r_offset - rel.appendUint64Le(relocation.offset); - // r_info type,sym - rel.appendUint32Le(relocation.kind); - rel.appendUint32Le(idx); - // r_addend - rel.appendUint64Le(relocation.addend); - } - const symtabIndex = sectionCount + 1; - writeSectionHeader(".rela", { - type: /*SHT_RELA*/ 4, - flags: 0, - addr: 0, - offset: 0, - size: rel.length, - link: symtabIndex, - info: textIndex, - addralign: 8, - entsize: 24, - }); - const symtab = new BufferBuilder(); const nameToSymIdx = new Map(); let symIdx = 0; - for (const sym of symbols) { + for (const sym of textRelativeSymbols) { const nameIdx = strs.pushAndGet(sym.name); - symtab.appendUint32Le(nameIdx); - symtab.appendUint8(sym.type | (sym.binding << 4)); - symtab.appendUint8(sym.visibility); - symtab.appendUint16Le(sym.sectionIndex); - symtab.appendUint64Le(sym.value); - symtab.appendUint64Le(sym.size); + symtab.append([ + ...littleEndian32(nameIdx), + /*STT_FUNC*/ 2 | /*STB_GLOBAL*/ (1 << 4), + /*STV_DEFAULT*/ 0, + /*shndx .text*/ ...littleEndian16(1), + /*value*/ ...littleEndian32(sym.offset), + ...[0, 0, 0, 0], + /*size*/ ...littleEndian32(sym.size), + ...[0, 0, 0, 0], + ]); nameToSymIdx.set(sym.name, symIdx); symIdx++; } + console.log(symtab); + // symtab section const strTableIndex = sectionCount + 1; - const firstGlobal = symbols.findIndex( - (sym) => sym.binding === SYMBOL_BINDING.STB_GLOBAL - ); - assertDefined(firstGlobal); writeSectionHeader(".symtab", { type: /*SHT_SYMTAB*/ 2, flags: 0, addr: 0, offset: 0, - size: symtab.length, + size: symtab.buffer.length, link: strTableIndex, - info: firstGlobal, + info: 0, addralign: 8, entsize: 24, }); @@ -1114,17 +638,20 @@ function lower(ast) { flags: 0, addr: 0, offset: 0, - size: strs.out.length, + size: strs.out.buffer.length, link: 0, info: 0, addralign: 1, entsize: 0, }); - out.writeUint16Le(shstrndxRef, sectionCount); + const shstrndx = littleEndian32(sectionCount); + out.buffer[shstrndxRef] = shstrndx[0]; + out.buffer[shstrndxRef + 1] = shstrndx[1]; - const totalSectionCount = sectionCount + 1; - out.writeUint16Le(shnumRef, totalSectionCount); + const totalSectionCount = littleEndian32(sectionCount + 1); + out.buffer[shnumRef] = totalSectionCount[0]; + out.buffer[shnumRef + 1] = totalSectionCount[1]; // shstrtab section writeSectionHeader(".shstrtab", { @@ -1132,37 +659,43 @@ function lower(ast) { flags: 0, addr: 0, offset: 0, - size: shstrs.out.length, + size: shstrs.out.buffer.length, link: 0, info: 0, addralign: 1, entsize: 0, }); - const patch32 = (baseOffset, value) => { - assertDefined(baseOffset, value); - out.writeUint32Le(baseOffset, value); + const alignTo = (align) => { + assertDefined(align); + const up = align - (out.buffer.length % align); + out.append(Array(up).fill(0)); }; - alignTo(out, 16); + const patch32 = (baseOffset, value) => { + assertDefined(baseOffset, value); + const encoded = littleEndian32(value); + out.buffer[baseOffset] = encoded[0]; + out.buffer[baseOffset + 1] = encoded[1]; + out.buffer[baseOffset + 2] = encoded[2]; + out.buffer[baseOffset + 3] = encoded[3]; + }; + + alignTo(16); patch32(sectionOffsetRefs[".text"], out.currentPos); out.append(textContent); - alignTo(out, 8); - patch32(sectionOffsetRefs[".rela"], out.currentPos); - out.append(rel.toUint8Array()); - patch32(sectionOffsetRefs[".strtab"], out.currentPos); - out.append(strs.out.toUint8Array()); + out.append(strs.out.buffer); - alignTo(out, 8); + alignTo(8); patch32(sectionOffsetRefs[".symtab"], out.currentPos); - out.append(symtab.toUint8Array()); + out.append(symtab.buffer); patch32(sectionOffsetRefs[".shstrtab"], out.currentPos); - out.append(shstrs.out.toUint8Array()); + out.append(shstrs.out.buffer); - return out.toUint8Array(); + return out.buffer; } const funcs = []; @@ -1172,60 +705,29 @@ function lower(ast) { funcs.push({ name: func.name.ident, code: ib.out, - relocations: ib.relocations, }); } - console.dir(funcs, { depth: 5 }); + console.log(funcs); const obj = generateObjectFile(funcs); return obj; } -async function link(object) { - async function execWithForwardedOutput(command, args) { - return new Promise((resolve, reject) => { - const child = spawn(command, args, { - stdio: "inherit", - }); - child.on("close", (code) => { - if (code === 0) { - resolve(); - } else { - reject(new CompilerError("gcc failed to link", 0)); - } - }); - }); - } - - // we could use a temporary directory in the future, but let's keep this debuggable for now - const outputFile = "output.o"; - fs.writeFile(outputFile, object); - - await execWithForwardedOutput("gcc", ["-Wl,-znoexecstack", outputFile]); - await execWithForwardedOutput("gdb", [ - "--batch", - "--command", - "dump-main.gdb", - "a.out", - ]); -} - async function compile(input) { const tokens = lex(input); - console.log("tokens", tokens); + console.log(tokens); const ast = parse(tokens); - console.log("ast"); console.dir(ast, { depth: 20 }); const object = lower(ast); - return link(object); + fs.writeFile("output.o", object); } const fileName = process.argv[2]; const input = await fs.readFile(fileName, "utf-8"); -console.log("input", input); +console.log(input); try { await compile(input); @@ -1237,16 +739,8 @@ try { } } -function assert(condition) { - if (!condition) { - throw new Error("assertion failed"); +function assertDefined(...values) { + if (values.some((value) => value === undefined || value === null)) { + throw new Error(`assertion failed, value undefined or null`); } } - -function assertDefined(...values) { - values.forEach((value, i) => { - if (value === null || value === undefined) { - throw new Error(`assertion failed, argument ${i} undefined or nu ll`); - } - }); -} diff --git a/input.c b/input.c index 03aabbd..d11299b 100644 --- a/input.c +++ b/input.c @@ -1,12 +1,6 @@ -// #include +//#include -int main(int argc, int argv) +int main(int argc) { - int x = 100; - return thisismyfakeconstantbecauseidonthaveconstant(x - 1); -} - -int thisismyfakeconstantbecauseidonthaveconstant(int x) -{ - return 1 + 1; + // exit(42); } diff --git a/shell.nix b/shell.nix index 5709ffc..d0e18d1 100644 --- a/shell.nix +++ b/shell.nix @@ -1,3 +1,3 @@ -{ pkgs ? import { } }: pkgs.mkShell { - nativeBuildInputs = with pkgs; [ nodejs_22 llvmPackages_18.lld llvmPackages_18.clang ]; +{ pkgs ? import {} }: pkgs.mkShell { + nativeBuildInputs = with pkgs; [ nodejs_22 ]; }