This commit is contained in:
nora 2024-11-22 23:34:37 +01:00
parent 16e5e33778
commit f22293c78e
3 changed files with 221 additions and 52 deletions

267
index.js
View file

@ -33,8 +33,16 @@ class CompilerError extends Error {
} }
function lex(input) { function lex(input) {
function alphabetic(char) { // 6.4.2 Identifiers
return (char >= "a" && char <= "z") || (char >= "A" && char <= "Z"); function identifierStart(char) {
return (
(char >= "a" && char <= "z") ||
(char >= "A" && char <= "Z") ||
char === "_"
);
}
function identifierCont(char) {
return identifierStart(char) || (char >= "0" && char <= "9");
} }
const tokens = []; const tokens = [];
@ -63,10 +71,10 @@ function lex(input) {
integer: Number(number), integer: Number(number),
span, span,
}); });
} else if (alphabetic(head)) { } else if (identifierStart(head)) {
const span = i - 1; const span = i - 1;
let ident = head; let ident = head;
while (alphabetic(input[i])) { while (identifierCont(input[i])) {
ident += input[i]; ident += input[i];
i++; i++;
} }
@ -346,17 +354,64 @@ function lower(ast) {
(number >> 24) & 0xff, (number >> 24) & 0xff,
]; ];
} }
function littleEndian64(number) {
assertDefined(number);
assert(number <= 0xff_ff_ff_ff);
return [...littleEndian32(number), 0, 0, 0, 0];
}
function signedLittleEndian64(number) {
assertDefined(number);
assert(number <= 0xff_ff_ff_ff);
assert(number >= -(0xff_ff_ff_ff + 1));
const array = littleEndian64(number);
const signBit = array[3] & 0b10000000;
if (signBit) {
array[4] = 0xff;
array[5] = 0xff;
array[6] = 0xff;
array[7] = 0xff;
}
console.log(array);
return array;
}
const RELOCATIONS = {
R_X86_64_PC32: 2,
};
const SYMBOL_TYPES = {
STT_NOTYPE: 0,
STT_FUNC: 2,
};
const SYMBOL_BINDING = {
STB_GLOBAL: 1,
};
const SYMBOL_VISIBILITY = {
STV_DEFAULT: 0,
};
// 2.1.3 ModR/M and SIB Bytes
const MOD_REG = 0b11; const MOD_REG = 0b11;
const RM_A = 0b000; const RM_A = 0b000;
const RM_C = 0b001; const RM_C = 0b001;
const RM_D = 0b010;
const RM_B = 0b011;
const RM_SP = 0b100;
const RM_BP = 0b101;
const RM_SI = 0b110;
const RM_DI = 0b111;
const REG_A = RM_A; const REG_A = RM_A;
const REG_C = RM_C; const REG_C = RM_C;
const REG_D = RM_D;
const REG_B = RM_B;
const REG_SP = RM_SP;
const REG_BP = RM_BP;
const REG_SI = RM_SI;
const REG_DI = RM_DI;
const REG_IGNORED = 0; const REG_IGNORED = 0;
function modRm(mod, rm, reg) { function modRm(mod, rm, reg) {
return (mod << 6) | rm | (reg << 3); return (mod << 6) | rm | (reg << 3);
} }
@ -365,6 +420,7 @@ function lower(ast) {
#stackSize; #stackSize;
constructor() { constructor() {
this.out = new Uint8Array(); this.out = new Uint8Array();
this.relocations = [];
this.#stackSize = 0; this.#stackSize = 0;
} }
@ -382,6 +438,25 @@ function lower(ast) {
]); ]);
} }
movEaxToEdi() {
// mov edi, eax ; Move r/m32 to r32
this.#append([0x8b, modRm(MOD_REG, RM_A, RM_DI)]);
}
call(symbol) {
// call rel32 ; Call near, relative, displacement relative to next
// ; instruction. 32-bit displacement sign extended to
// ; 64-bits in 64-bit mode
this.#append([0xe8]);
this.relocations.push({
kind: RELOCATIONS.R_X86_64_PC32,
symbol,
offset: this.out.length,
addend: -4,
});
this.#append([0x0, 0x0, 0x0, 0x0]);
}
ret() { ret() {
// ret ; near return to calling prodecude // ret ; near return to calling prodecude
this.#append([0xc3]); this.#append([0xc3]);
@ -402,7 +477,9 @@ function lower(ast) {
throw new Error("bad"); throw new Error("bad");
} }
const arg0 = codegenExpr(ib, expr.args[0]); codegenExpr(ib, expr.args[0]);
ib.movEaxToEdi();
ib.call(expr.lhs.string);
break; break;
} }
@ -445,6 +522,7 @@ function lower(ast) {
} }
append(array) { append(array) {
assertDefined(array); assertDefined(array);
array.forEach((elem) => assert(typeof elem === "number"));
this.buffer = Buffer.concat([this.buffer, new Uint8Array(array)]); this.buffer = Buffer.concat([this.buffer, new Uint8Array(array)]);
} }
get currentPos() { get currentPos() {
@ -453,18 +531,69 @@ function lower(ast) {
} }
function generateObjectFile(funcs) { function generateObjectFile(funcs) {
if (funcs.length !== 1) { const alignTo = (out, align) => {
throw new Error("bad"); assertDefined(out, align);
const missing = out.buffer.length % align;
if (missing === 0) {
return;
}
const up = align - missing;
out.append(Array(up).fill(0));
};
function layoutFuncs(funcs) {
const textContent = new BufferBuilder();
const textRelativeSymbols = [];
const relocations = [];
funcs.forEach((func) => {
alignTo(textContent, 8); // i think this is not actually necessary.
const offset = textContent.buffer.length;
textRelativeSymbols.push({
name: func.name,
offset,
size: func.code.length,
});
relocations.push(
...func.relocations.map((relocation) => ({
kind: relocation.kind,
symbol: relocation.symbol,
addend: relocation.addend,
offset: offset + relocation.offset,
}))
);
textContent.append(func.code);
});
return {
textContent: textContent.buffer,
textRelativeSymbols,
relocations,
};
} }
const textContent = funcs[0].code; const symbols = [];
const textRelativeSymbols = [
{ const {
name: funcs[0].name, textContent,
offset: 0, textRelativeSymbols,
size: funcs[0].code.length, relocations: funcRelocations,
}, } = layoutFuncs(funcs);
];
for (const sym of textRelativeSymbols) {
symbols.push({
name: sym.name,
type: SYMBOL_TYPES.STT_FUNC,
binding: SYMBOL_BINDING.STB_GLOBAL,
visibility: SYMBOL_VISIBILITY.STV_DEFAULT,
sectionIndex: 1 /*.text*/,
value: sym.offset,
size: sym.size,
});
}
console.log(funcRelocations);
let out = new BufferBuilder(); let out = new BufferBuilder();
// ident // ident
@ -548,23 +677,17 @@ function lower(ast) {
out.append([ out.append([
...littleEndian32(nameIndex), ...littleEndian32(nameIndex),
...littleEndian32(sh.type), ...littleEndian32(sh.type),
...littleEndian32(sh.flags), ...littleEndian64(sh.flags),
...[0, 0, 0, 0], // flag pad ...littleEndian64(sh.addr),
...littleEndian32(sh.addr),
...[0, 0, 0, 0],
]); ]);
sectionOffsetRefs[name] = out.currentPos; sectionOffsetRefs[name] = out.currentPos;
out.append([ out.append([
...littleEndian32(sh.offset), ...littleEndian64(sh.offset),
...[0, 0, 0, 0], ...littleEndian64(sh.size),
...littleEndian32(sh.size),
...[0, 0, 0, 0],
...littleEndian32(sh.link), ...littleEndian32(sh.link),
...littleEndian32(sh.info), ...littleEndian32(sh.info),
...littleEndian32(sh.addralign), ...littleEndian64(sh.addralign),
...[0, 0, 0, 0], ...littleEndian64(sh.entsize),
...littleEndian32(sh.entsize),
...[0, 0, 0, 0],
]); ]);
}; };
@ -582,6 +705,8 @@ function lower(ast) {
}); });
// text section // text section
const textIndex = sectionCount;
console.log(textContent);
writeSectionHeader(".text", { writeSectionHeader(".text", {
type: /*SHT_PROGBITS*/ 1, type: /*SHT_PROGBITS*/ 1,
flags: /*SHF_ALLOC*/ (1 << 1) | /*SHF_EXECINSTR*/ (1 << 2), flags: /*SHF_ALLOC*/ (1 << 1) | /*SHF_EXECINSTR*/ (1 << 2),
@ -594,21 +719,58 @@ function lower(ast) {
entsize: 0, entsize: 0,
}); });
const rel = new BufferBuilder();
for (const relocation of funcRelocations) {
let idx = symbols.findIndex((sym) => sym.name === relocation.symbol);
if (idx === -1) {
idx = symbols.length;
symbols.push({
name: relocation.symbol,
type: SYMBOL_TYPES.STT_NOTYPE,
binding: SYMBOL_BINDING.STB_GLOBAL,
visibility: SYMBOL_VISIBILITY.STV_DEFAULT,
sectionIndex: 0,
value: 0,
size: 0,
});
}
console.log(rel.buffer.length);
// r_offset
rel.append([...littleEndian32(relocation.offset), ...[0, 0, 0, 0]]);
// r_info type,sym
rel.append(littleEndian32(relocation.kind));
rel.append(littleEndian32(idx));
// r_addend
rel.append(signedLittleEndian64(relocation.addend));
}
console.log(symbols, rel.buffer.length);
const symtabIndex = sectionCount + 1;
console.log("text", textIndex);
writeSectionHeader(".rela", {
type: /*SHT_RELA*/ 4,
flags: 0,
addr: 0,
offset: 0,
size: rel.buffer.length,
link: symtabIndex,
info: textIndex,
addralign: 8,
entsize: 24,
});
const symtab = new BufferBuilder(); const symtab = new BufferBuilder();
const nameToSymIdx = new Map(); const nameToSymIdx = new Map();
let symIdx = 0; let symIdx = 0;
for (const sym of textRelativeSymbols) { for (const sym of symbols) {
const nameIdx = strs.pushAndGet(sym.name); const nameIdx = strs.pushAndGet(sym.name);
symtab.append([ symtab.append([
...littleEndian32(nameIdx), ...littleEndian32(nameIdx),
/*STT_FUNC*/ 2 | /*STB_GLOBAL*/ (1 << 4), sym.type | (sym.binding << 4),
/*STV_DEFAULT*/ 0, sym.visibility,
/*shndx .text*/ ...littleEndian16(1), /*shndx*/ ...littleEndian16(sym.sectionIndex),
/*value*/ ...littleEndian32(sym.offset), /*value*/ ...littleEndian64(sym.value),
...[0, 0, 0, 0], /*size*/ ...littleEndian64(sym.size),
/*size*/ ...littleEndian32(sym.size),
...[0, 0, 0, 0],
]); ]);
nameToSymIdx.set(sym.name, symIdx); nameToSymIdx.set(sym.name, symIdx);
symIdx++; symIdx++;
@ -662,12 +824,6 @@ function lower(ast) {
entsize: 0, entsize: 0,
}); });
const alignTo = (align) => {
assertDefined(align);
const up = align - (out.buffer.length % align);
out.append(Array(up).fill(0));
};
const patch32 = (baseOffset, value) => { const patch32 = (baseOffset, value) => {
assertDefined(baseOffset, value); assertDefined(baseOffset, value);
const encoded = littleEndian32(value); const encoded = littleEndian32(value);
@ -677,14 +833,18 @@ function lower(ast) {
out.buffer[baseOffset + 3] = encoded[3]; out.buffer[baseOffset + 3] = encoded[3];
}; };
alignTo(16); alignTo(out, 16);
patch32(sectionOffsetRefs[".text"], out.currentPos); patch32(sectionOffsetRefs[".text"], out.currentPos);
out.append(textContent); out.append(textContent);
alignTo(out, 8);
patch32(sectionOffsetRefs[".rela"], out.currentPos);
out.append(rel.buffer);
patch32(sectionOffsetRefs[".strtab"], out.currentPos); patch32(sectionOffsetRefs[".strtab"], out.currentPos);
out.append(strs.out.buffer); out.append(strs.out.buffer);
alignTo(8); alignTo(out, 8);
patch32(sectionOffsetRefs[".symtab"], out.currentPos); patch32(sectionOffsetRefs[".symtab"], out.currentPos);
out.append(symtab.buffer); out.append(symtab.buffer);
@ -701,10 +861,11 @@ function lower(ast) {
funcs.push({ funcs.push({
name: func.name.ident, name: func.name.ident,
code: ib.out, code: ib.out,
relocations: ib.relocations,
}); });
} }
console.log(funcs); console.dir(funcs, { depth: 5 });
const obj = generateObjectFile(funcs); const obj = generateObjectFile(funcs);
@ -730,7 +891,7 @@ function link(object) {
} else { } else {
reject(new CompilerError("gcc failed to link", 0)); reject(new CompilerError("gcc failed to link", 0));
} }
}) });
}); });
} }
@ -758,8 +919,16 @@ try {
} }
} }
function assertDefined(...values) { function assert(condition) {
if (values.some((value) => value === undefined || value === null)) { if (!condition) {
throw new Error(`assertion failed, value undefined or null`); throw new Error("assertion failed");
} }
} }
function assertDefined(...values) {
values.forEach((value, i) => {
if (value === null || value === undefined) {
throw new Error(`assertion failed, argument ${i} undefined or null`);
}
});
}

View file

@ -2,5 +2,5 @@
int main(int argc) int main(int argc)
{ {
// exit(42); exit(42);
} }

View file

@ -1,3 +1,3 @@
{ pkgs ? import <nixpkgs> {} }: pkgs.mkShell { { pkgs ? import <nixpkgs> { } }: pkgs.mkShell {
nativeBuildInputs = with pkgs; [ nodejs_22 ]; nativeBuildInputs = with pkgs; [ nodejs_22 llvmPackages_18.lld ];
} }