From 16e5e3377815c85c6100ccb2e2b052980bf5a5d7 Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Fri, 22 Nov 2024 21:46:24 +0100 Subject: [PATCH 01/10] works --- .gitignore | 2 ++ index.js | 39 +++++++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 10 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d7756c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +a.out +*.o diff --git a/index.js b/index.js index a7dce2a..435d6a4 100644 --- a/index.js +++ b/index.js @@ -1,3 +1,4 @@ +import { spawn } from "node:child_process"; import fs from "node:fs/promises"; // https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf @@ -6,9 +7,8 @@ import fs from "node:fs/promises"; class CompilerError extends Error { constructor(message, span) { super(message); - if (!span) { - throw new Error("span must be present"); - } + assertDefined(message); + assertDefined(span); this.span = span; } @@ -358,7 +358,7 @@ function lower(ast) { const REG_IGNORED = 0; function modRm(mod, rm, reg) { - return (mod >> 6) | rm | (reg << 3); + return (mod << 6) | rm | (reg << 3); } class InstBuilder { @@ -376,7 +376,7 @@ function lower(ast) { movEaxImm32(imm) { // mov eax, imm this.#append([ - 0xC7, + 0xc7, modRm(MOD_REG, RM_A, REG_IGNORED), ...littleEndian32(imm), ]); @@ -422,8 +422,6 @@ function lower(ast) { const ib = new InstBuilder(); for (const stmt of func.body) { - console.log("doing cg for", stmt.kind); - switch (stmt.kind) { case "expr": { codegenExpr(ib, stmt.expr); @@ -616,8 +614,6 @@ function lower(ast) { symIdx++; } - console.log(symtab); - // symtab section const strTableIndex = sectionCount + 1; writeSectionHeader(".symtab", { @@ -715,6 +711,29 @@ function lower(ast) { return obj; } +function link(object) { + // we could use a temporary directory in the future, but let's keep this debuggable for now + const outputFile = "output.o"; + fs.writeFile(outputFile, object); + + return new Promise((resolve, reject) => { + const gcc = spawn("gcc", [outputFile]); + gcc.stdout.on("data", (data) => { + process.stdout.write(data); + }); + gcc.stderr.on("data", (data) => { + process.stderr.write(data); + }); + gcc.on("close", (code) => { + if (code === 0) { + resolve(); + } else { + reject(new CompilerError("gcc failed to link", 0)); + } + }) + }); +} + async function compile(input) { const tokens = lex(input); console.log(tokens); @@ -722,7 +741,7 @@ async function compile(input) { console.dir(ast, { depth: 20 }); const object = lower(ast); - fs.writeFile("output.o", object); + return link(object); } const fileName = process.argv[2]; From f22293c78e026795ab2c8c2ce8676fc6ec53f620 Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Fri, 22 Nov 2024 23:34:37 +0100 Subject: [PATCH 02/10] exit --- index.js | 267 ++++++++++++++++++++++++++++++++++++++++++++---------- input.c | 2 +- shell.nix | 4 +- 3 files changed, 221 insertions(+), 52 deletions(-) diff --git a/index.js b/index.js index 435d6a4..0f74013 100644 --- a/index.js +++ b/index.js @@ -33,8 +33,16 @@ class CompilerError extends Error { } function lex(input) { - function alphabetic(char) { - return (char >= "a" && char <= "z") || (char >= "A" && char <= "Z"); + // 6.4.2 Identifiers + function identifierStart(char) { + return ( + (char >= "a" && char <= "z") || + (char >= "A" && char <= "Z") || + char === "_" + ); + } + function identifierCont(char) { + return identifierStart(char) || (char >= "0" && char <= "9"); } const tokens = []; @@ -63,10 +71,10 @@ function lex(input) { integer: Number(number), span, }); - } else if (alphabetic(head)) { + } else if (identifierStart(head)) { const span = i - 1; let ident = head; - while (alphabetic(input[i])) { + while (identifierCont(input[i])) { ident += input[i]; i++; } @@ -346,17 +354,64 @@ function lower(ast) { (number >> 24) & 0xff, ]; } + function littleEndian64(number) { + assertDefined(number); + assert(number <= 0xff_ff_ff_ff); + return [...littleEndian32(number), 0, 0, 0, 0]; + } + function signedLittleEndian64(number) { + assertDefined(number); + assert(number <= 0xff_ff_ff_ff); + assert(number >= -(0xff_ff_ff_ff + 1)); + const array = littleEndian64(number); + const signBit = array[3] & 0b10000000; + if (signBit) { + array[4] = 0xff; + array[5] = 0xff; + array[6] = 0xff; + array[7] = 0xff; + } + console.log(array); + return array; + } + + const RELOCATIONS = { + R_X86_64_PC32: 2, + }; + const SYMBOL_TYPES = { + STT_NOTYPE: 0, + STT_FUNC: 2, + }; + const SYMBOL_BINDING = { + STB_GLOBAL: 1, + }; + const SYMBOL_VISIBILITY = { + STV_DEFAULT: 0, + }; + + // 2.1.3 ModR/M and SIB Bytes const MOD_REG = 0b11; const RM_A = 0b000; const RM_C = 0b001; + const RM_D = 0b010; + const RM_B = 0b011; + const RM_SP = 0b100; + const RM_BP = 0b101; + const RM_SI = 0b110; + const RM_DI = 0b111; const REG_A = RM_A; const REG_C = RM_C; + const REG_D = RM_D; + const REG_B = RM_B; + const REG_SP = RM_SP; + const REG_BP = RM_BP; + const REG_SI = RM_SI; + const REG_DI = RM_DI; const REG_IGNORED = 0; - function modRm(mod, rm, reg) { return (mod << 6) | rm | (reg << 3); } @@ -365,6 +420,7 @@ function lower(ast) { #stackSize; constructor() { this.out = new Uint8Array(); + this.relocations = []; this.#stackSize = 0; } @@ -382,6 +438,25 @@ function lower(ast) { ]); } + movEaxToEdi() { + // mov edi, eax ; Move r/m32 to r32 + this.#append([0x8b, modRm(MOD_REG, RM_A, RM_DI)]); + } + + call(symbol) { + // call rel32 ; Call near, relative, displacement relative to next + // ; instruction. 32-bit displacement sign extended to + // ; 64-bits in 64-bit mode + this.#append([0xe8]); + this.relocations.push({ + kind: RELOCATIONS.R_X86_64_PC32, + symbol, + offset: this.out.length, + addend: -4, + }); + this.#append([0x0, 0x0, 0x0, 0x0]); + } + ret() { // ret ; near return to calling prodecude this.#append([0xc3]); @@ -402,7 +477,9 @@ function lower(ast) { throw new Error("bad"); } - const arg0 = codegenExpr(ib, expr.args[0]); + codegenExpr(ib, expr.args[0]); + ib.movEaxToEdi(); + ib.call(expr.lhs.string); break; } @@ -445,6 +522,7 @@ function lower(ast) { } append(array) { assertDefined(array); + array.forEach((elem) => assert(typeof elem === "number")); this.buffer = Buffer.concat([this.buffer, new Uint8Array(array)]); } get currentPos() { @@ -453,18 +531,69 @@ function lower(ast) { } function generateObjectFile(funcs) { - if (funcs.length !== 1) { - throw new Error("bad"); + const alignTo = (out, align) => { + assertDefined(out, align); + const missing = out.buffer.length % align; + if (missing === 0) { + return; + } + const up = align - missing; + out.append(Array(up).fill(0)); + }; + + function layoutFuncs(funcs) { + const textContent = new BufferBuilder(); + + const textRelativeSymbols = []; + const relocations = []; + + funcs.forEach((func) => { + alignTo(textContent, 8); // i think this is not actually necessary. + const offset = textContent.buffer.length; + textRelativeSymbols.push({ + name: func.name, + offset, + size: func.code.length, + }); + relocations.push( + ...func.relocations.map((relocation) => ({ + kind: relocation.kind, + symbol: relocation.symbol, + addend: relocation.addend, + offset: offset + relocation.offset, + })) + ); + textContent.append(func.code); + }); + + return { + textContent: textContent.buffer, + textRelativeSymbols, + relocations, + }; } - const textContent = funcs[0].code; - const textRelativeSymbols = [ - { - name: funcs[0].name, - offset: 0, - size: funcs[0].code.length, - }, - ]; + const symbols = []; + + const { + textContent, + textRelativeSymbols, + relocations: funcRelocations, + } = layoutFuncs(funcs); + + for (const sym of textRelativeSymbols) { + symbols.push({ + name: sym.name, + type: SYMBOL_TYPES.STT_FUNC, + binding: SYMBOL_BINDING.STB_GLOBAL, + visibility: SYMBOL_VISIBILITY.STV_DEFAULT, + sectionIndex: 1 /*.text*/, + value: sym.offset, + size: sym.size, + }); + } + + console.log(funcRelocations); let out = new BufferBuilder(); // ident @@ -548,23 +677,17 @@ function lower(ast) { out.append([ ...littleEndian32(nameIndex), ...littleEndian32(sh.type), - ...littleEndian32(sh.flags), - ...[0, 0, 0, 0], // flag pad - ...littleEndian32(sh.addr), - ...[0, 0, 0, 0], + ...littleEndian64(sh.flags), + ...littleEndian64(sh.addr), ]); sectionOffsetRefs[name] = out.currentPos; out.append([ - ...littleEndian32(sh.offset), - ...[0, 0, 0, 0], - ...littleEndian32(sh.size), - ...[0, 0, 0, 0], + ...littleEndian64(sh.offset), + ...littleEndian64(sh.size), ...littleEndian32(sh.link), ...littleEndian32(sh.info), - ...littleEndian32(sh.addralign), - ...[0, 0, 0, 0], - ...littleEndian32(sh.entsize), - ...[0, 0, 0, 0], + ...littleEndian64(sh.addralign), + ...littleEndian64(sh.entsize), ]); }; @@ -582,6 +705,8 @@ function lower(ast) { }); // text section + const textIndex = sectionCount; + console.log(textContent); writeSectionHeader(".text", { type: /*SHT_PROGBITS*/ 1, flags: /*SHF_ALLOC*/ (1 << 1) | /*SHF_EXECINSTR*/ (1 << 2), @@ -594,21 +719,58 @@ function lower(ast) { entsize: 0, }); + const rel = new BufferBuilder(); + for (const relocation of funcRelocations) { + let idx = symbols.findIndex((sym) => sym.name === relocation.symbol); + if (idx === -1) { + idx = symbols.length; + symbols.push({ + name: relocation.symbol, + type: SYMBOL_TYPES.STT_NOTYPE, + binding: SYMBOL_BINDING.STB_GLOBAL, + visibility: SYMBOL_VISIBILITY.STV_DEFAULT, + sectionIndex: 0, + value: 0, + size: 0, + }); + } + console.log(rel.buffer.length); + // r_offset + rel.append([...littleEndian32(relocation.offset), ...[0, 0, 0, 0]]); + // r_info type,sym + rel.append(littleEndian32(relocation.kind)); + rel.append(littleEndian32(idx)); + // r_addend + rel.append(signedLittleEndian64(relocation.addend)); + } + console.log(symbols, rel.buffer.length); + const symtabIndex = sectionCount + 1; + console.log("text", textIndex); + writeSectionHeader(".rela", { + type: /*SHT_RELA*/ 4, + flags: 0, + addr: 0, + offset: 0, + size: rel.buffer.length, + link: symtabIndex, + info: textIndex, + addralign: 8, + entsize: 24, + }); + const symtab = new BufferBuilder(); const nameToSymIdx = new Map(); let symIdx = 0; - for (const sym of textRelativeSymbols) { + for (const sym of symbols) { const nameIdx = strs.pushAndGet(sym.name); symtab.append([ ...littleEndian32(nameIdx), - /*STT_FUNC*/ 2 | /*STB_GLOBAL*/ (1 << 4), - /*STV_DEFAULT*/ 0, - /*shndx .text*/ ...littleEndian16(1), - /*value*/ ...littleEndian32(sym.offset), - ...[0, 0, 0, 0], - /*size*/ ...littleEndian32(sym.size), - ...[0, 0, 0, 0], + sym.type | (sym.binding << 4), + sym.visibility, + /*shndx*/ ...littleEndian16(sym.sectionIndex), + /*value*/ ...littleEndian64(sym.value), + /*size*/ ...littleEndian64(sym.size), ]); nameToSymIdx.set(sym.name, symIdx); symIdx++; @@ -662,12 +824,6 @@ function lower(ast) { entsize: 0, }); - const alignTo = (align) => { - assertDefined(align); - const up = align - (out.buffer.length % align); - out.append(Array(up).fill(0)); - }; - const patch32 = (baseOffset, value) => { assertDefined(baseOffset, value); const encoded = littleEndian32(value); @@ -677,14 +833,18 @@ function lower(ast) { out.buffer[baseOffset + 3] = encoded[3]; }; - alignTo(16); + alignTo(out, 16); patch32(sectionOffsetRefs[".text"], out.currentPos); out.append(textContent); + alignTo(out, 8); + patch32(sectionOffsetRefs[".rela"], out.currentPos); + out.append(rel.buffer); + patch32(sectionOffsetRefs[".strtab"], out.currentPos); out.append(strs.out.buffer); - alignTo(8); + alignTo(out, 8); patch32(sectionOffsetRefs[".symtab"], out.currentPos); out.append(symtab.buffer); @@ -701,10 +861,11 @@ function lower(ast) { funcs.push({ name: func.name.ident, code: ib.out, + relocations: ib.relocations, }); } - console.log(funcs); + console.dir(funcs, { depth: 5 }); const obj = generateObjectFile(funcs); @@ -730,7 +891,7 @@ function link(object) { } else { reject(new CompilerError("gcc failed to link", 0)); } - }) + }); }); } @@ -758,8 +919,16 @@ try { } } -function assertDefined(...values) { - if (values.some((value) => value === undefined || value === null)) { - throw new Error(`assertion failed, value undefined or null`); +function assert(condition) { + if (!condition) { + throw new Error("assertion failed"); } } + +function assertDefined(...values) { + values.forEach((value, i) => { + if (value === null || value === undefined) { + throw new Error(`assertion failed, argument ${i} undefined or null`); + } + }); +} diff --git a/input.c b/input.c index d11299b..a2a12a6 100644 --- a/input.c +++ b/input.c @@ -2,5 +2,5 @@ int main(int argc) { - // exit(42); + exit(42); } diff --git a/shell.nix b/shell.nix index d0e18d1..da05ddd 100644 --- a/shell.nix +++ b/shell.nix @@ -1,3 +1,3 @@ -{ pkgs ? import {} }: pkgs.mkShell { - nativeBuildInputs = with pkgs; [ nodejs_22 ]; +{ pkgs ? import { } }: pkgs.mkShell { + nativeBuildInputs = with pkgs; [ nodejs_22 llvmPackages_18.lld ]; } From 923e6f23526e8669341ad4ee427c28d5545b57fd Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Fri, 22 Nov 2024 23:54:09 +0100 Subject: [PATCH 03/10] things --- index.js | 50 +++++++++++++++++++++++++++++++++++++++----------- input.c | 9 +++++++-- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/index.js b/index.js index 0f74013..cc2b86e 100644 --- a/index.js +++ b/index.js @@ -269,19 +269,46 @@ function parse(tokens) { return parseComma(tok); } + function parseStatement(tok) { + switch (tok.peek()?.kind) { + case "ident": { + switch (tok.peek()?.ident) { + case "return": { + const span = tok.next("return").span; + let rhs = undefined; + if (tok.peek()?.kind !== ";") { + rhs = parseExpr(tok); + } + + return { + kind: "return", + rhs, + span, + }; + } + default: { + // fallthrough + } + } + } + default: { + const expr = parseExpr(tok); + return { + kind: "expr", + expr, + span: expr.span, + }; + } + } + } + function parseBlock(tok) { tok.expect("{", "start of block"); const statements = []; while (tok.peek()?.kind !== "}") { - // TODO: non-expression statements - const expr = parseExpr(tok); - statements.push({ - kind: "expr", - expr, - span: expr.span, - }); + statements.push(parseStatement(tok)); tok.expect(";", "end of statement"); } @@ -477,6 +504,7 @@ function lower(ast) { throw new Error("bad"); } + // TODO: save codegenExpr(ib, expr.args[0]); ib.movEaxToEdi(); ib.call(expr.lhs.string); @@ -505,7 +533,10 @@ function lower(ast) { break; } default: { - throw new Error(`unsupported stmt: ${stmt.kind}`); + if (stmt.rhs) { + codegenExpr(ib, stmt.rhs); + } + ib.ret(); } } } @@ -706,7 +737,6 @@ function lower(ast) { // text section const textIndex = sectionCount; - console.log(textContent); writeSectionHeader(".text", { type: /*SHT_PROGBITS*/ 1, flags: /*SHF_ALLOC*/ (1 << 1) | /*SHF_EXECINSTR*/ (1 << 2), @@ -743,9 +773,7 @@ function lower(ast) { // r_addend rel.append(signedLittleEndian64(relocation.addend)); } - console.log(symbols, rel.buffer.length); const symtabIndex = sectionCount + 1; - console.log("text", textIndex); writeSectionHeader(".rela", { type: /*SHT_RELA*/ 4, flags: 0, diff --git a/input.c b/input.c index a2a12a6..8d8b15a 100644 --- a/input.c +++ b/input.c @@ -1,6 +1,11 @@ -//#include +// #include int main(int argc) { - exit(42); + exit(thisismyfakeconstantbecauseidonthaveconstant(1)); +} + +int thisismyfakeconstantbecauseidonthaveconstant(int x) +{ + return 9; } From 767cebafd811c123141615279c7fda207c9f7a8e Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Sat, 23 Nov 2024 00:04:53 +0100 Subject: [PATCH 04/10] more --- index.js | 22 ++++++++++++++++++++-- input.c | 2 +- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index cc2b86e..d7ccfb1 100644 --- a/index.js +++ b/index.js @@ -103,7 +103,8 @@ function lex(input) { case "{": case "}": case ",": - case ";": { + case ";": + case "+": { tokens.push({ kind: head, span: i - 1, @@ -258,8 +259,23 @@ function parse(tokens) { return lhs; } - const parseLogicalAnd = generateBinaryParser(["&&"], parsePostfix); + const parseMultiplicative = generateBinaryParser( + ["*", "/", "%"], + parsePostfix + ); + const parseAdditive = generateBinaryParser(["+", "-"], parseMultiplicative); + const parseShift = generateBinaryParser(["<<", ">>"], parseAdditive); + const parseRelational = generateBinaryParser( + ["<", ">", "<=", ">="], + parseShift + ); + const parseEquality = generateBinaryParser(["==", "!="], parseRelational); + const parseAnd = generateBinaryParser(["&"], parseEquality); + const parseExclusiveOr = generateBinaryParser(["^"], parseAnd); + const parseInclusiveOr = generateBinaryParser(["|"], parseExclusiveOr); + const parseLogicalAnd = generateBinaryParser(["&&"], parseInclusiveOr); const parseLogicalOr = generateBinaryParser(["||"], parseLogicalAnd); + // TODO conditional operator const parseAssignment = generateBinaryParser( ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", , "^=", "|="], parseLogicalOr @@ -516,6 +532,8 @@ function lower(ast) { break; } case "+": { + codegenExpr(ib, expr.rhs); + assert(false); } default: { throw new Error(`unsupported expr: ${expr.kind}`); diff --git a/input.c b/input.c index 8d8b15a..c2d11ff 100644 --- a/input.c +++ b/input.c @@ -7,5 +7,5 @@ int main(int argc) int thisismyfakeconstantbecauseidonthaveconstant(int x) { - return 9; + return 1 + 1; } From c0a2c8d1aac1892a9e9dca584280344ef25aeed9 Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Sat, 23 Nov 2024 13:07:29 +0100 Subject: [PATCH 05/10] prepare stack --- dump-main.gdb | 3 + index.js | 151 ++++++++++++++++++++++++++++++++++++++++---------- input.c | 5 +- 3 files changed, 129 insertions(+), 30 deletions(-) create mode 100644 dump-main.gdb diff --git a/dump-main.gdb b/dump-main.gdb new file mode 100644 index 0000000..2994383 --- /dev/null +++ b/dump-main.gdb @@ -0,0 +1,3 @@ +set disassembly-flavor intel +set style enabled on +disas main diff --git a/index.js b/index.js index d7ccfb1..02b3c17 100644 --- a/index.js +++ b/index.js @@ -415,7 +415,6 @@ function lower(ast) { array[6] = 0xff; array[7] = 0xff; } - console.log(array); return array; } @@ -456,15 +455,70 @@ function lower(ast) { const REG_IGNORED = 0; function modRm(mod, rm, reg) { + assert(mod <= 0b11); + assert(rm <= 0b111); + assert(reg <= 0b111); return (mod << 6) | rm | (reg << 3); } + const REX = { + W_OPERAND_SIZE_DETERMINED: 0, + W_64_BIT_OPERAND_SIZE: 1, + }; + + function rex(w, r, x, b) { + assert(w <= 1); + assert(r <= 1); + assert(x <= 1); + assert(b <= 1); + return 0b0100_0000 | (w << 3) | (r << 2) | (x << 1) | b; + } + class InstBuilder { + /** + * The reserved stack space for locals and intermediary values. + * todo todo something + */ #stackSize; + #patches; constructor() { this.out = new Uint8Array(); this.relocations = []; this.#stackSize = 0; + this.#patches = []; + + this.#prologue(); + } + + #prologue() { + // push rbp + this.pushReg64(REG_BP); // push british petroleum + // sub rsp, SIZE + this.subImm(REG_SP, 0); + this.#patches.push({ + start: this.out.length - 4, + patch: () => littleEndian32(this.#stackSize), + }); + // mov rbp, rsp + this.movRegReg64(REG_BP, REG_SP); + } + + #epilogue() { + // mov rsp, rbp + this.movRegReg64(REG_SP, REG_BP); + // pop rbp + this.popReg64(REG_BP); + } + + finish() { + this.#epilogue(); + this.#patches.forEach((patch) => { + const result = patch.patch(); + assert(Array.isArray(result)); + result.forEach((v, i) => { + this.out[patch.start + i] = v; + }); + }); } reserveStack(size) { @@ -481,9 +535,38 @@ function lower(ast) { ]); } - movEaxToEdi() { - // mov edi, eax ; Move r/m32 to r32 - this.#append([0x8b, modRm(MOD_REG, RM_A, RM_DI)]); + movRegReg32(to, from) { + // ; Move r/m32 to r32 + this.#append([0x8b, modRm(MOD_REG, from, to)]); + } + + movRegReg64(to, from) { + // ; Move r/m64 to r64. + this.#append([ + rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), + 0x8b, + modRm(MOD_REG, from, to), + ]); + } + + pushReg64(reg) { + // 58+rd ; Push r64. + this.#append([0x50 | reg]); + } + + popReg64(reg) { + // 50+rd ; Pop top of stack into r64; increment stack pointer. + this.#append([0x58 | reg]); + } + + subImm(reg, imm) { + // REX.W + 81 /5 id ; Subtract imm32 sign-extended to 64-bits from r/m64. + this.#append([ + rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), + 0x81, + modRm(MOD_REG, reg, 5 /* /5*/), + ...littleEndian32(imm), + ]); } call(symbol) { @@ -522,7 +605,8 @@ function lower(ast) { // TODO: save codegenExpr(ib, expr.args[0]); - ib.movEaxToEdi(); + // mov edi, eax + ib.movRegReg32(REG_DI, REG_A); ib.call(expr.lhs.string); break; @@ -550,16 +634,22 @@ function lower(ast) { codegenExpr(ib, stmt.expr); break; } - default: { + case "return": { if (stmt.rhs) { codegenExpr(ib, stmt.rhs); } + ib.finish(); ib.ret(); + break; + } + default: { + assert(false); } } } ib.movEaxImm32(0); + ib.finish(); ib.ret(); return ib; @@ -642,7 +732,7 @@ function lower(ast) { }); } - console.log(funcRelocations); + console.log("relocations", funcRelocations); let out = new BufferBuilder(); // ident @@ -782,7 +872,6 @@ function lower(ast) { size: 0, }); } - console.log(rel.buffer.length); // r_offset rel.append([...littleEndian32(relocation.offset), ...[0, 0, 0, 0]]); // r_info type,sym @@ -918,34 +1007,40 @@ function lower(ast) { return obj; } -function link(object) { +async function link(object) { + async function execWithForwardedOutput(command, args) { + return new Promise((resolve, reject) => { + const child = spawn(command, args, { + stdio: "inherit", + }); + child.on("close", (code) => { + if (code === 0) { + resolve(); + } else { + reject(new CompilerError("gcc failed to link", 0)); + } + }); + }); + } + // we could use a temporary directory in the future, but let's keep this debuggable for now const outputFile = "output.o"; fs.writeFile(outputFile, object); - return new Promise((resolve, reject) => { - const gcc = spawn("gcc", [outputFile]); - gcc.stdout.on("data", (data) => { - process.stdout.write(data); - }); - gcc.stderr.on("data", (data) => { - process.stderr.write(data); - }); - gcc.on("close", (code) => { - if (code === 0) { - resolve(); - } else { - reject(new CompilerError("gcc failed to link", 0)); - } - }); - }); + await execWithForwardedOutput("gcc", [outputFile]); + await execWithForwardedOutput("gdb", [ + "--batch", + "--command", + "dump-main.gdb", + "a.out", + ]); } async function compile(input) { const tokens = lex(input); - console.log(tokens); + console.log("tokens", tokens); const ast = parse(tokens); - console.dir(ast, { depth: 20 }); + console.dir("ast", ast, { depth: 20 }); const object = lower(ast); return link(object); @@ -953,7 +1048,7 @@ async function compile(input) { const fileName = process.argv[2]; const input = await fs.readFile(fileName, "utf-8"); -console.log(input); +console.log("input", input); try { await compile(input); diff --git a/input.c b/input.c index c2d11ff..a7e8632 100644 --- a/input.c +++ b/input.c @@ -2,10 +2,11 @@ int main(int argc) { - exit(thisismyfakeconstantbecauseidonthaveconstant(1)); + thisismyfakeconstantbecauseidonthaveconstant(1); + exit(42); } int thisismyfakeconstantbecauseidonthaveconstant(int x) { - return 1 + 1; + // return 1 + 1; } From 96f2dcf8afe46b98288ea3e39d58257958009e88 Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Fri, 29 Nov 2024 22:14:42 +0100 Subject: [PATCH 06/10] GLORY TO SHADOWING --- index.js | 130 +++++++++++++++++++++++++++++++++++++++++++++++-------- input.c | 7 +-- 2 files changed, 116 insertions(+), 21 deletions(-) diff --git a/index.js b/index.js index 02b3c17..9a05890 100644 --- a/index.js +++ b/index.js @@ -104,7 +104,8 @@ function lex(input) { case "}": case ",": case ";": - case "+": { + case "+": + case "=": { tokens.push({ kind: head, span: i - 1, @@ -302,6 +303,25 @@ function parse(tokens) { span, }; } + case "int": { + const typeSpan = tok.next("ident").span; + const name = tok.next("ident"); + + tok.next("="); + + let init; + if (tok.peek()?.kind !== ";") { + init = parseExpr(tok); + } + + return { + kind: "declaration", + type: "int", + name: name.ident, + span: typeSpan, + init, + }; + } default: { // fallthrough } @@ -382,14 +402,20 @@ function lower(ast) { x86-64 codegen. strategy: every expression returns its result in rax. + + useful: + - https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html + - https://gitlab.com/x86-psABIs/x86-64-ABI */ function littleEndian16(number) { assertDefined(number); + assert(typeof number === "number"); return [number & 0xff, (number >> 8) & 0xff]; } function littleEndian32(number) { assertDefined(number); + assert(typeof number === "number"); return [ number & 0xff, (number >> 8) & 0xff, @@ -400,12 +426,14 @@ function lower(ast) { function littleEndian64(number) { assertDefined(number); assert(number <= 0xff_ff_ff_ff); + assert(typeof number === "number"); return [...littleEndian32(number), 0, 0, 0, 0]; } function signedLittleEndian64(number) { assertDefined(number); assert(number <= 0xff_ff_ff_ff); assert(number >= -(0xff_ff_ff_ff + 1)); + assert(typeof number === "number"); const array = littleEndian64(number); const signBit = array[3] & 0b10000000; @@ -454,11 +482,11 @@ function lower(ast) { const REG_DI = RM_DI; const REG_IGNORED = 0; - function modRm(mod, rm, reg) { + function modRm(mod, reg, rm) { assert(mod <= 0b11); assert(rm <= 0b111); assert(reg <= 0b111); - return (mod << 6) | rm | (reg << 3); + return (mod << 6) | (reg << 3) | rm; } const REX = { @@ -477,7 +505,7 @@ function lower(ast) { class InstBuilder { /** * The reserved stack space for locals and intermediary values. - * todo todo something + * We subtract rsp by this value in the beginning. */ #stackSize; #patches; @@ -493,14 +521,14 @@ function lower(ast) { #prologue() { // push rbp this.pushReg64(REG_BP); // push british petroleum + // mov rbp, rsp + this.movRegReg64(REG_BP, REG_SP); // sub rsp, SIZE this.subImm(REG_SP, 0); this.#patches.push({ start: this.out.length - 4, patch: () => littleEndian32(this.#stackSize), }); - // mov rbp, rsp - this.movRegReg64(REG_BP, REG_SP); } #epilogue() { @@ -522,22 +550,56 @@ function lower(ast) { } reserveStack(size) { + const prev = this.#stackSize; this.#stackSize += size; - return this.#stackSize; + return prev; + } + + addRegs(reg1, reg2) { + // REX.W + 03 /r | ADD r64, r/m64 ; Add r/m64 to r64 + this.#append([ + rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), + 0x03, + modRm(MOD_REG, reg1, reg2), + ]); + } + + movRegToStackOffset(offset, reg) { + // mov [rsp+{offset}], reg + // 89 /r, MOV r/m64, r64 + this.#append([ + 0x89, + // [--][--]+disp32 + modRm(0b10, reg, 0b100), + 0x24, // SIB: Scaled index: none, Base: ESP + ...littleEndian32(offset), + ]); + } + + movStackOffsetToReg(offset, reg) { + // mov reg, [rsp+{offset}] + // 89 /r, MOV r/m64, r64 + this.#append([ + 0x8b, + // [--][--]+disp32 + modRm(0b10, reg, 0b100), + 0x24, // SIB: Scaled index: none, Base: ESP + ...littleEndian32(offset), + ]); } movEaxImm32(imm) { // mov eax, imm this.#append([ 0xc7, - modRm(MOD_REG, RM_A, REG_IGNORED), + modRm(MOD_REG, REG_IGNORED, RM_A), ...littleEndian32(imm), ]); } movRegReg32(to, from) { // ; Move r/m32 to r32 - this.#append([0x8b, modRm(MOD_REG, from, to)]); + this.#append([0x8b, modRm(MOD_REG, to, from)]); } movRegReg64(to, from) { @@ -545,7 +607,7 @@ function lower(ast) { this.#append([ rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), 0x8b, - modRm(MOD_REG, from, to), + modRm(MOD_REG, to, from), ]); } @@ -564,7 +626,7 @@ function lower(ast) { this.#append([ rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), 0x81, - modRm(MOD_REG, reg, 5 /* /5*/), + modRm(MOD_REG, 5 /* /5*/, reg), ...littleEndian32(imm), ]); } @@ -593,7 +655,8 @@ function lower(ast) { } } - function codegenExpr(ib, expr) { + function codegenExpr(ctx, expr) { + const { ib, variables } = ctx; switch (expr.kind) { case "call": { if (expr.lhs.kind !== "ident") { @@ -604,7 +667,7 @@ function lower(ast) { } // TODO: save - codegenExpr(ib, expr.args[0]); + codegenExpr(ctx, expr.args[0]); // mov edi, eax ib.movRegReg32(REG_DI, REG_A); ib.call(expr.lhs.string); @@ -615,9 +678,24 @@ function lower(ast) { ib.movEaxImm32(expr.integer); break; } + case "ident": { + const offset = [...variables] + .reverse() + .find((v) => v.name === expr.string); + assert(offset); + ib.movStackOffsetToReg(offset.stackOffset, REG_A); + break; + } case "+": { - codegenExpr(ib, expr.rhs); - assert(false); + // For binary expressions, we first evaluate the LHS, save it on the stack, + // evaluate the RHS, then restore the LHS and perform the operation. + codegenExpr(ctx, expr.lhs); + ib.pushReg64(REG_A); // push rax + codegenExpr(ctx, expr.rhs); + ib.popReg64(REG_C); // pop rcx + + ib.addRegs(REG_A, REG_C); + break; } default: { throw new Error(`unsupported expr: ${expr.kind}`); @@ -627,21 +705,36 @@ function lower(ast) { function codegenFunction(func) { const ib = new InstBuilder(); + const variables = []; for (const stmt of func.body) { switch (stmt.kind) { case "expr": { - codegenExpr(ib, stmt.expr); + codegenExpr({ ib, variables }, stmt.expr); break; } case "return": { if (stmt.rhs) { - codegenExpr(ib, stmt.rhs); + codegenExpr({ ib, variables }, stmt.rhs); } ib.finish(); ib.ret(); break; } + case "declaration": { + assert(stmt.type === "int"); + if (stmt.init) { + codegenExpr({ ib, variables }, stmt.init); + } + const slot = ib.reserveStack(4); + variables.push({ + name: stmt.name, + stackOffset: slot, + }); + // mov [rsp+{slot}], eax + ib.movRegToStackOffset(slot, REG_A); + break; + } default: { assert(false); } @@ -1040,7 +1133,8 @@ async function compile(input) { const tokens = lex(input); console.log("tokens", tokens); const ast = parse(tokens); - console.dir("ast", ast, { depth: 20 }); + console.log("ast"); + console.dir(ast, { depth: 20 }); const object = lower(ast); return link(object); diff --git a/input.c b/input.c index a7e8632..f754aeb 100644 --- a/input.c +++ b/input.c @@ -2,11 +2,12 @@ int main(int argc) { - thisismyfakeconstantbecauseidonthaveconstant(1); - exit(42); + int x = 100; + int x = 200; + return x; } int thisismyfakeconstantbecauseidonthaveconstant(int x) { - // return 1 + 1; + return 1 + 1; } From 741b1000b3bbe2e3b3087450bd28c31f66fcf4d3 Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Fri, 29 Nov 2024 22:50:26 +0100 Subject: [PATCH 07/10] ASLR in action --- index.js | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- input.c | 6 +++--- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/index.js b/index.js index 9a05890..0151e5e 100644 --- a/index.js +++ b/index.js @@ -105,6 +105,7 @@ function lex(input) { case ",": case ";": case "+": + case "-": case "=": { tokens.push({ kind: head, @@ -362,6 +363,9 @@ function parse(tokens) { const params = []; while (tok.peek()?.kind !== ")") { + if (params.length > 0) { + tok.expect(",", "function parameter separator"); + } const type = parseType(tok, "function parameter"); const name = tok.next("function name"); if (name.kind !== "ident") { @@ -382,8 +386,9 @@ function parse(tokens) { return { kind: "function", - ret, name, + params, + ret, body, }; } @@ -481,6 +486,8 @@ function lower(ast) { const REG_SI = RM_SI; const REG_DI = RM_DI; + const PARAM_CALLCONV_REGISTERS = [REG_DI, REG_SI, REG_D, REG_C]; + const REG_IGNORED = 0; function modRm(mod, reg, rm) { assert(mod <= 0b11); @@ -564,6 +571,15 @@ function lower(ast) { ]); } + subRegs(reg1, reg2) { + // REX.W + 03 /r | ADD r64, r/m64 ; Add r/m64 to r64 + this.#append([ + rex(REX.W_64_BIT_OPERAND_SIZE, 0, 0, 0), + 0x2b, + modRm(MOD_REG, reg1, reg2), + ]); + } + movRegToStackOffset(offset, reg) { // mov [rsp+{offset}], reg // 89 /r, MOV r/m64, r64 @@ -656,6 +672,7 @@ function lower(ast) { } function codegenExpr(ctx, expr) { + assert(!Number.isNaN(ctx.offset)); const { ib, variables } = ctx; switch (expr.kind) { case "call": { @@ -683,7 +700,7 @@ function lower(ast) { .reverse() .find((v) => v.name === expr.string); assert(offset); - ib.movStackOffsetToReg(offset.stackOffset, REG_A); + ib.movStackOffsetToReg(offset.stackOffset + ctx.offset, REG_A); break; } case "+": { @@ -691,12 +708,21 @@ function lower(ast) { // evaluate the RHS, then restore the LHS and perform the operation. codegenExpr(ctx, expr.lhs); ib.pushReg64(REG_A); // push rax - codegenExpr(ctx, expr.rhs); + codegenExpr({ ...ctx, offset: ctx.offset + 8 }, expr.rhs); ib.popReg64(REG_C); // pop rcx ib.addRegs(REG_A, REG_C); break; } + case "-": { + codegenExpr(ctx, expr.rhs); + ib.pushReg64(REG_A); // push rax + codegenExpr({ ...ctx, offset: ctx.offset + 8 }, expr.lhs); + ib.popReg64(REG_C); // pop rcx + + ib.subRegs(REG_A, REG_C); + break; + } default: { throw new Error(`unsupported expr: ${expr.kind}`); } @@ -707,15 +733,28 @@ function lower(ast) { const ib = new InstBuilder(); const variables = []; + assert(func.params.length <= 4); + + func.params.forEach((param, i) => { + assert(param.type.kind === "int"); + const offset = ib.reserveStack(4); + ib.movRegToStackOffset(offset, PARAM_CALLCONV_REGISTERS[i]); + variables.push({ + name: param.name, + stackOffset: offset, + }); + }); + for (const stmt of func.body) { + const ctx = { ib, variables, offset: 0 }; switch (stmt.kind) { case "expr": { - codegenExpr({ ib, variables }, stmt.expr); + codegenExpr(ctx, stmt.expr); break; } case "return": { if (stmt.rhs) { - codegenExpr({ ib, variables }, stmt.rhs); + codegenExpr(ctx, stmt.rhs); } ib.finish(); ib.ret(); @@ -724,7 +763,7 @@ function lower(ast) { case "declaration": { assert(stmt.type === "int"); if (stmt.init) { - codegenExpr({ ib, variables }, stmt.init); + codegenExpr(ctx, stmt.init); } const slot = ib.reserveStack(4); variables.push({ @@ -1163,7 +1202,7 @@ function assert(condition) { function assertDefined(...values) { values.forEach((value, i) => { if (value === null || value === undefined) { - throw new Error(`assertion failed, argument ${i} undefined or null`); + throw new Error(`assertion failed, argument ${i} undefined or nu ll`); } }); } diff --git a/input.c b/input.c index f754aeb..77c38f7 100644 --- a/input.c +++ b/input.c @@ -1,10 +1,10 @@ // #include -int main(int argc) +int main(int argc, int argv) { int x = 100; - int x = 200; - return x; + thisismyfakeconstantbecauseidonthaveconstant(x - 1); + return argv; } int thisismyfakeconstantbecauseidonthaveconstant(int x) From b2d506b2aef46bb2f9b96639492f74669e1cc9ee Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Fri, 29 Nov 2024 23:10:22 +0100 Subject: [PATCH 08/10] ship it --- .gitignore | 1 + index.js | 24 ++++++++++++++++++++---- input.c | 3 +-- shell.nix | 2 +- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index d7756c2..6ce8b08 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ a.out *.o +.vscode diff --git a/index.js b/index.js index 0151e5e..c6a3593 100644 --- a/index.js +++ b/index.js @@ -453,12 +453,14 @@ function lower(ast) { const RELOCATIONS = { R_X86_64_PC32: 2, + R_X86_64_PLT32: 4, }; const SYMBOL_TYPES = { STT_NOTYPE: 0, STT_FUNC: 2, }; const SYMBOL_BINDING = { + STB_LOCAL: 0, STB_GLOBAL: 1, }; const SYMBOL_VISIBILITY = { @@ -653,7 +655,7 @@ function lower(ast) { // ; 64-bits in 64-bit mode this.#append([0xe8]); this.relocations.push({ - kind: RELOCATIONS.R_X86_64_PC32, + kind: RELOCATIONS.R_X86_64_PC32, // pietro said i should use this even though PC_32 works too symbol, offset: this.out.length, addend: -4, @@ -844,7 +846,17 @@ function lower(ast) { }; } - const symbols = []; + const symbols = [ + { + name: "", + type: SYMBOL_TYPES.STT_NOTYPE, + binding: SYMBOL_BINDING.STB_LOCAL, + visibility: SYMBOL_VISIBILITY.STV_DEFAULT, + sectionIndex: 0, + value: 0, + size: 0, + }, + ]; const { textContent, @@ -1045,6 +1057,10 @@ function lower(ast) { // symtab section const strTableIndex = sectionCount + 1; + const firstGlobal = symbols.findIndex( + (sym) => sym.binding === SYMBOL_BINDING.STB_GLOBAL + ); + assertDefined(firstGlobal); writeSectionHeader(".symtab", { type: /*SHT_SYMTAB*/ 2, flags: 0, @@ -1052,7 +1068,7 @@ function lower(ast) { offset: 0, size: symtab.buffer.length, link: strTableIndex, - info: 0, + info: firstGlobal, addralign: 8, entsize: 24, }); @@ -1159,7 +1175,7 @@ async function link(object) { const outputFile = "output.o"; fs.writeFile(outputFile, object); - await execWithForwardedOutput("gcc", [outputFile]); + await execWithForwardedOutput("gcc", ["-Wl,-znoexecstack", outputFile]); await execWithForwardedOutput("gdb", [ "--batch", "--command", diff --git a/input.c b/input.c index 77c38f7..03aabbd 100644 --- a/input.c +++ b/input.c @@ -3,8 +3,7 @@ int main(int argc, int argv) { int x = 100; - thisismyfakeconstantbecauseidonthaveconstant(x - 1); - return argv; + return thisismyfakeconstantbecauseidonthaveconstant(x - 1); } int thisismyfakeconstantbecauseidonthaveconstant(int x) diff --git a/shell.nix b/shell.nix index da05ddd..5709ffc 100644 --- a/shell.nix +++ b/shell.nix @@ -1,3 +1,3 @@ { pkgs ? import { } }: pkgs.mkShell { - nativeBuildInputs = with pkgs; [ nodejs_22 llvmPackages_18.lld ]; + nativeBuildInputs = with pkgs; [ nodejs_22 llvmPackages_18.lld llvmPackages_18.clang ]; } From dc6217d1b6d89fdf78ec65b4ebe8d966aef48515 Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Sun, 12 Jan 2025 16:54:37 +0100 Subject: [PATCH 09/10] start using growable array buffer to make it BLAZINGLY EFFICIENT --- index.js | 75 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/index.js b/index.js index c6a3593..fe30df2 100644 --- a/index.js +++ b/index.js @@ -4,6 +4,8 @@ import fs from "node:fs/promises"; // https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf // yep. +const BUFFER_LE = true; + class CompilerError extends Error { constructor(message, span) { super(message); @@ -790,23 +792,43 @@ function lower(ast) { } class BufferBuilder { + #buffer constructor() { - this.buffer = new Uint8Array(); + this.#buffer = new ArrayBuffer(0, {maxByteLength: 2**32}); } append(array) { assertDefined(array); array.forEach((elem) => assert(typeof elem === "number")); - this.buffer = Buffer.concat([this.buffer, new Uint8Array(array)]); + + const oldLength = this.length; + const newLength = oldLength + array.length; + this.#buffer.resize(newLength); + + for (let i = 0; i < array.length; i++) { + new DataView(this.#buffer).setUint8(oldLength + i, array[i]); + } + } + get length() { + return this.#buffer.byteLength; } get currentPos() { - return this.buffer.length; + return this.length; + } + writeUint16Le(offset, int) { + new DataView(this.#buffer).setUint16(offset, int, BUFFER_LE); + } + writeUint32Le(offset, int) { + new DataView(this.#buffer).setUint32(offset, int, BUFFER_LE); + } + toUint8Array() { + return new Uint8Array(this.#buffer); } } function generateObjectFile(funcs) { const alignTo = (out, align) => { assertDefined(out, align); - const missing = out.buffer.length % align; + const missing = out.length % align; if (missing === 0) { return; } @@ -822,7 +844,7 @@ function lower(ast) { funcs.forEach((func) => { alignTo(textContent, 8); // i think this is not actually necessary. - const offset = textContent.buffer.length; + const offset = textContent.length; textRelativeSymbols.push({ name: func.name, offset, @@ -840,7 +862,7 @@ function lower(ast) { }); return { - textContent: textContent.buffer, + textContent: textContent.toUint8Array(), textRelativeSymbols, relocations, }; @@ -923,11 +945,7 @@ function lower(ast) { // Let's write some section headers. - const shoff = littleEndian32(out.currentPos); - out.buffer[shoffRef] = shoff[0]; - out.buffer[shoffRef + 1] = shoff[1]; - out.buffer[shoffRef + 2] = shoff[2]; - out.buffer[shoffRef + 3] = shoff[3]; + out.writeUint32Le(shoffRef, out.currentPos); class NullTerminatedStringStore { #offsets; @@ -939,7 +957,7 @@ function lower(ast) { if (this.#offsets.has(str)) { return this.#offsets.get(str); } - const offset = this.out.buffer.length; + const offset = this.out.length; this.#offsets.set(str, offset); this.out.append(new TextEncoder("utf-8").encode(str)); this.out.append([0]); @@ -1030,7 +1048,7 @@ function lower(ast) { flags: 0, addr: 0, offset: 0, - size: rel.buffer.length, + size: rel.length, link: symtabIndex, info: textIndex, addralign: 8, @@ -1066,7 +1084,7 @@ function lower(ast) { flags: 0, addr: 0, offset: 0, - size: symtab.buffer.length, + size: symtab.length, link: strTableIndex, info: firstGlobal, addralign: 8, @@ -1079,20 +1097,17 @@ function lower(ast) { flags: 0, addr: 0, offset: 0, - size: strs.out.buffer.length, + size: strs.out.length, link: 0, info: 0, addralign: 1, entsize: 0, }); - const shstrndx = littleEndian32(sectionCount); - out.buffer[shstrndxRef] = shstrndx[0]; - out.buffer[shstrndxRef + 1] = shstrndx[1]; + out.writeUint16Le(shstrndxRef, sectionCount); - const totalSectionCount = littleEndian32(sectionCount + 1); - out.buffer[shnumRef] = totalSectionCount[0]; - out.buffer[shnumRef + 1] = totalSectionCount[1]; + const totalSectionCount = sectionCount + 1; + out.writeUint16Le(shnumRef, totalSectionCount); // shstrtab section writeSectionHeader(".shstrtab", { @@ -1100,7 +1115,7 @@ function lower(ast) { flags: 0, addr: 0, offset: 0, - size: shstrs.out.buffer.length, + size: shstrs.out.length, link: 0, info: 0, addralign: 1, @@ -1109,11 +1124,7 @@ function lower(ast) { const patch32 = (baseOffset, value) => { assertDefined(baseOffset, value); - const encoded = littleEndian32(value); - out.buffer[baseOffset] = encoded[0]; - out.buffer[baseOffset + 1] = encoded[1]; - out.buffer[baseOffset + 2] = encoded[2]; - out.buffer[baseOffset + 3] = encoded[3]; + out.writeUint32Le(baseOffset, value); }; alignTo(out, 16); @@ -1122,19 +1133,19 @@ function lower(ast) { alignTo(out, 8); patch32(sectionOffsetRefs[".rela"], out.currentPos); - out.append(rel.buffer); + out.append(rel.toUint8Array()); patch32(sectionOffsetRefs[".strtab"], out.currentPos); - out.append(strs.out.buffer); + out.append(strs.out.toUint8Array()); alignTo(out, 8); patch32(sectionOffsetRefs[".symtab"], out.currentPos); - out.append(symtab.buffer); + out.append(symtab.toUint8Array()); patch32(sectionOffsetRefs[".shstrtab"], out.currentPos); - out.append(shstrs.out.buffer); + out.append(shstrs.out.toUint8Array()); - return out.buffer; + return out.toUint8Array(); } const funcs = []; From ee5fcc2e24b3acb40f3131e6e4b6a18c8592a2b8 Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Sun, 12 Jan 2025 17:07:01 +0100 Subject: [PATCH 10/10] improve --- index.js | 73 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/index.js b/index.js index fe30df2..d25511e 100644 --- a/index.js +++ b/index.js @@ -792,9 +792,30 @@ function lower(ast) { } class BufferBuilder { - #buffer + #buffer; constructor() { - this.#buffer = new ArrayBuffer(0, {maxByteLength: 2**32}); + this.#buffer = new ArrayBuffer(0, { maxByteLength: 2 ** 32 }); + } + #appendUintGeneric(size, name, int) { + assertDefined(int); + assert(typeof int === "number" || typeof int === "bigint"); + + const oldLength = this.length; + this.#buffer.resize(oldLength + size); + new DataView(this.#buffer)[name](oldLength, int, BUFFER_LE); + } + appendUint8(int) { + this.#appendUintGeneric(1, "setUint8", int); + } + appendUint16Le(int) { + this.#appendUintGeneric(2, "setUint16", int); + } + appendUint32Le(int) { + this.#appendUintGeneric(4, "setUint32", int); + } + appendUint64Le(int) { + assert(typeof int === "number"); + this.#appendUintGeneric(8, "setBigUint64", BigInt(int)); } append(array) { assertDefined(array); @@ -815,9 +836,11 @@ function lower(ast) { return this.length; } writeUint16Le(offset, int) { + assertDefined(offset, int); new DataView(this.#buffer).setUint16(offset, int, BUFFER_LE); } writeUint32Le(offset, int) { + assertDefined(offset, int); new DataView(this.#buffer).setUint32(offset, int, BUFFER_LE); } toUint8Array() { @@ -975,21 +998,17 @@ function lower(ast) { const writeSectionHeader = (name, sh) => { sectionCount++; const nameIndex = shstrs.pushAndGet(name); - out.append([ - ...littleEndian32(nameIndex), - ...littleEndian32(sh.type), - ...littleEndian64(sh.flags), - ...littleEndian64(sh.addr), - ]); + out.appendUint32Le(nameIndex); + out.appendUint32Le(sh.type); + out.appendUint64Le(sh.flags); + out.appendUint64Le(sh.addr); sectionOffsetRefs[name] = out.currentPos; - out.append([ - ...littleEndian64(sh.offset), - ...littleEndian64(sh.size), - ...littleEndian32(sh.link), - ...littleEndian32(sh.info), - ...littleEndian64(sh.addralign), - ...littleEndian64(sh.entsize), - ]); + out.appendUint64Le(sh.offset); + out.appendUint64Le(sh.size); + out.appendUint32Le(sh.link); + out.appendUint32Le(sh.info); + out.appendUint64Le(sh.addralign); + out.appendUint64Le(sh.entsize); }; // null section @@ -1035,12 +1054,12 @@ function lower(ast) { }); } // r_offset - rel.append([...littleEndian32(relocation.offset), ...[0, 0, 0, 0]]); + rel.appendUint64Le(relocation.offset); // r_info type,sym - rel.append(littleEndian32(relocation.kind)); - rel.append(littleEndian32(idx)); + rel.appendUint32Le(relocation.kind); + rel.appendUint32Le(idx); // r_addend - rel.append(signedLittleEndian64(relocation.addend)); + rel.appendUint64Le(relocation.addend); } const symtabIndex = sectionCount + 1; writeSectionHeader(".rela", { @@ -1060,15 +1079,13 @@ function lower(ast) { let symIdx = 0; for (const sym of symbols) { const nameIdx = strs.pushAndGet(sym.name); + symtab.appendUint32Le(nameIdx); + symtab.appendUint8(sym.type | (sym.binding << 4)); + symtab.appendUint8(sym.visibility); + symtab.appendUint16Le(sym.sectionIndex); + symtab.appendUint64Le(sym.value); + symtab.appendUint64Le(sym.size); - symtab.append([ - ...littleEndian32(nameIdx), - sym.type | (sym.binding << 4), - sym.visibility, - /*shndx*/ ...littleEndian16(sym.sectionIndex), - /*value*/ ...littleEndian64(sym.value), - /*size*/ ...littleEndian64(sym.size), - ]); nameToSymIdx.set(sym.name, symIdx); symIdx++; }