jompiler/index.js
2024-11-22 21:46:24 +01:00

765 lines
17 KiB
JavaScript

import { spawn } from "node:child_process";
import fs from "node:fs/promises";
// https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf
// yep.
class CompilerError extends Error {
constructor(message, span) {
super(message);
assertDefined(message);
assertDefined(span);
this.span = span;
}
render(sourceFileName, source) {
const lines = source.split("\n");
let total = 0;
let lineNumber = -1;
let colNumber = -1;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const newTotal = total + line.length + 1;
if (newTotal > this.span) {
lineNumber = i + 1;
colNumber = this.span - total + 1;
break;
}
total = newTotal;
}
return `error: ${sourceFileName}:${lineNumber}:${colNumber}: ${this.message}`;
}
}
function lex(input) {
function alphabetic(char) {
return (char >= "a" && char <= "z") || (char >= "A" && char <= "Z");
}
const tokens = [];
let i = 0;
while (i < input.length) {
const head = input[i];
i++;
if (head === " " || head === "\t" || head === "\n") {
continue;
}
if (head === "/" && input[i] === "/") {
while (input[i] !== "\n") {
i++;
}
continue;
}
if (head >= "0" && head <= "9") {
const span = i - 1;
let number = head;
while (input[i] >= "0" && input[i] <= "9") {
number += input[i];
i++;
}
tokens.push({
kind: "integer",
integer: Number(number),
span,
});
} else if (alphabetic(head)) {
const span = i - 1;
let ident = head;
while (alphabetic(input[i])) {
ident += input[i];
i++;
}
tokens.push({
kind: "ident",
ident,
span,
});
} else if (head === '"') {
const span = i - 1;
let string = "";
while (input[i] !== '"') {
string += input[i];
i++;
}
i++; // "
tokens.push({
kind: "string",
string,
span,
});
} else {
switch (head) {
case "(":
case ")":
case "{":
case "}":
case ",":
case ";": {
tokens.push({
kind: head,
span: i - 1,
});
break;
}
default: {
throw new CompilerError(`unexpected character '${head}'`, i - 1);
}
}
}
}
return tokens;
}
function parse(tokens) {
const tokenIter = {
tokens,
nextIndex: 0,
hasNext() {
return this.nextIndex < tokens.length;
},
peek() {
if (!this.hasNext()) {
return undefined;
}
return this.tokens[this.nextIndex];
},
next(description) {
if (!this.hasNext()) {
throw new CompilerError(
`unexpected end of file while parsing ${description}`
);
}
return this.tokens[this.nextIndex++];
},
expect(kind, description) {
const tok = this.next(description);
if (tok.kind !== kind) {
throw new CompilerError(
`expected '${kind}' for ${description}, found ${tok.kind}`,
tok.span
);
}
return tok;
},
};
function parseType(tok, description) {
const first = tok.next(`type for ${description}`);
if (first.kind === "ident") {
switch (first.ident) {
case "int": {
return {
kind: "int",
span: first.span,
};
}
}
} else {
throw new CompilerError(
`unexpected token ${first.kind} while parsing type for ${description}`,
first.span
);
}
}
function parseExpr(tok) {
function generateBinaryParser(ops, higher) {
return function parseBinaryExpr(tok) {
const lhs = higher(tok);
if (ops.includes(tok.peek()?.kind)) {
const op = tok.next("binary operator");
const rhs = parseBinaryExpr(tok);
return {
kind: op.kind,
span: op.span,
lhs,
rhs,
};
}
return lhs;
};
}
function parsePrimary(tok) {
const head = tok.next("expression");
switch (head.kind) {
case "string": {
return {
kind: "string",
string: head.string,
span: head.span,
};
}
case "ident": {
return {
kind: "ident",
string: head.ident,
span: head.span,
};
}
case "integer": {
return {
kind: "integer",
integer: head.integer,
span: head.span,
};
}
default: {
throw new CompilerError(
`unexpected token ${head.kind} in expression`,
head.span
);
}
}
}
function parsePostfix(tok) {
let lhs = parsePrimary(tok);
while (true) {
const next = tok.peek()?.kind;
if (next === "(") {
const popen = tok.next("open parenthesis of call");
const args = [];
let isFirst = true;
while (tok.peek()?.kind !== ")") {
if (!isFirst) {
tok.expect(",", "comma between arguments");
}
const next = parseExpr(tok);
args.push(next);
}
tok.expect(")", "closing parenthesis of call");
lhs = {
kind: "call",
lhs,
args,
span: popen.span,
};
} else {
break;
}
}
return lhs;
}
const parseLogicalAnd = generateBinaryParser(["&&"], parsePostfix);
const parseLogicalOr = generateBinaryParser(["||"], parseLogicalAnd);
const parseAssignment = generateBinaryParser(
["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", , "^=", "|="],
parseLogicalOr
);
const parseComma = generateBinaryParser([","], parseAssignment);
return parseComma(tok);
}
function parseBlock(tok) {
tok.expect("{", "start of block");
const statements = [];
while (tok.peek()?.kind !== "}") {
// TODO: non-expression statements
const expr = parseExpr(tok);
statements.push({
kind: "expr",
expr,
span: expr.span,
});
tok.expect(";", "end of statement");
}
tok.expect("}", "end of block");
return statements;
}
function parseDeclaration(tok) {
const ret = parseType(tok, "function return");
const name = tok.expect("ident", "function name");
tok.expect("(", "function parentheses");
const params = [];
while (tok.peek()?.kind !== ")") {
const type = parseType(tok, "function parameter");
const name = tok.next("function name");
if (name.kind !== "ident") {
throw new CompilerError(
`expected identifier for function parameter, found ${name.kind}`,
name.span
);
}
params.push({
type,
name: name.ident,
});
}
tok.expect(")", "function parentheses");
const body = parseBlock(tok);
return {
kind: "function",
ret,
name,
body,
};
}
const decls = [];
while (tokenIter.hasNext()) {
const decl = parseDeclaration(tokenIter);
decls.push(decl);
}
return decls;
}
function lower(ast) {
/*
x86-64 codegen.
strategy: every expression returns its result in rax.
*/
function littleEndian16(number) {
assertDefined(number);
return [number & 0xff, (number >> 8) & 0xff];
}
function littleEndian32(number) {
assertDefined(number);
return [
number & 0xff,
(number >> 8) & 0xff,
(number >> 16) & 0xff,
(number >> 24) & 0xff,
];
}
const MOD_REG = 0b11;
const RM_A = 0b000;
const RM_C = 0b001;
const REG_A = RM_A;
const REG_C = RM_C;
const REG_IGNORED = 0;
function modRm(mod, rm, reg) {
return (mod << 6) | rm | (reg << 3);
}
class InstBuilder {
#stackSize;
constructor() {
this.out = new Uint8Array();
this.#stackSize = 0;
}
reserveStack(size) {
this.#stackSize += size;
return this.#stackSize;
}
movEaxImm32(imm) {
// mov eax, imm
this.#append([
0xc7,
modRm(MOD_REG, RM_A, REG_IGNORED),
...littleEndian32(imm),
]);
}
ret() {
// ret ; near return to calling prodecude
this.#append([0xc3]);
}
#append(code) {
this.out = Buffer.concat([this.out, new Uint8Array(code)]);
}
}
function codegenExpr(ib, expr) {
switch (expr.kind) {
case "call": {
if (expr.lhs.kind !== "ident") {
throw new Error("bad");
}
if (expr.args.length !== 1) {
throw new Error("bad");
}
const arg0 = codegenExpr(ib, expr.args[0]);
break;
}
case "integer": {
ib.movEaxImm32(expr.integer);
break;
}
case "+": {
}
default: {
throw new Error(`unsupported expr: ${expr.kind}`);
}
}
}
function codegenFunction(func) {
const ib = new InstBuilder();
for (const stmt of func.body) {
switch (stmt.kind) {
case "expr": {
codegenExpr(ib, stmt.expr);
break;
}
default: {
throw new Error(`unsupported stmt: ${stmt.kind}`);
}
}
}
ib.movEaxImm32(0);
ib.ret();
return ib;
}
class BufferBuilder {
constructor() {
this.buffer = new Uint8Array();
}
append(array) {
assertDefined(array);
this.buffer = Buffer.concat([this.buffer, new Uint8Array(array)]);
}
get currentPos() {
return this.buffer.length;
}
}
function generateObjectFile(funcs) {
if (funcs.length !== 1) {
throw new Error("bad");
}
const textContent = funcs[0].code;
const textRelativeSymbols = [
{
name: funcs[0].name,
offset: 0,
size: funcs[0].code.length,
},
];
let out = new BufferBuilder();
// ident
out.append([0x7f, "E".charCodeAt(0), "L".charCodeAt(0), "F".charCodeAt(0)]);
out.append([
/*ELFCLASS64*/ 2, /*ELFDATA2LSB*/ 1, /*EV_CURRENT*/ 1,
/*ELFOSABI_SYSV*/ 0, /*EI_ABIVERSION*/ 0, /*EI_PAD*/ 0, 0, 0, 0, 0, 0, 0,
]);
let shoffRef;
let shnumRef;
let shstrndxRef;
let sectionOffsetRefs = {};
// type
out.append([/*ET_REL*/ 1, 0]);
// machine
out.append([/*EM_X86_64*/ 62, 0]);
// version
out.append([/*EV_CURRENT*/ 1, 0, 0, 0]);
// entry
out.append([0, 0, 0, 0, 0, 0, 0, 0]);
// phoff
out.append([0, 0, 0, 0, 0, 0, 0, 0]); // no ph
// shoff
shoffRef = out.currentPos;
out.append([0, 0, 0, 0, 0, 0, 0, 0]);
// flags
out.append([0, 0, 0, 0]);
// ehsize
out.append([64, 0]);
// phentsize
out.append([0, 0]); // no ph
// phnum
out.append([0, 0]);
// shentsize
out.append([64, 0]);
// shnum
shnumRef = out.currentPos;
out.append([0, 0]);
// shstrndx
shstrndxRef = out.currentPos;
out.append([0, 0]);
// Let's write some section headers.
const shoff = littleEndian32(out.currentPos);
out.buffer[shoffRef] = shoff[0];
out.buffer[shoffRef + 1] = shoff[1];
out.buffer[shoffRef + 2] = shoff[2];
out.buffer[shoffRef + 3] = shoff[3];
class NullTerminatedStringStore {
#offsets;
constructor() {
this.#offsets = new Map();
this.out = new BufferBuilder();
}
pushAndGet(str) {
if (this.#offsets.has(str)) {
return this.#offsets.get(str);
}
const offset = this.out.buffer.length;
this.#offsets.set(str, offset);
this.out.append(new TextEncoder("utf-8").encode(str));
this.out.append([0]);
return offset;
}
}
const shstrs = new NullTerminatedStringStore();
shstrs.pushAndGet("");
shstrs.pushAndGet(".shstrtab"); // ensure that this is already present so it doesn't get added afterwards when we already got the length
const strs = new NullTerminatedStringStore();
strs.pushAndGet("");
let sectionCount = 0;
const writeSectionHeader = (name, sh) => {
sectionCount++;
const nameIndex = shstrs.pushAndGet(name);
out.append([
...littleEndian32(nameIndex),
...littleEndian32(sh.type),
...littleEndian32(sh.flags),
...[0, 0, 0, 0], // flag pad
...littleEndian32(sh.addr),
...[0, 0, 0, 0],
]);
sectionOffsetRefs[name] = out.currentPos;
out.append([
...littleEndian32(sh.offset),
...[0, 0, 0, 0],
...littleEndian32(sh.size),
...[0, 0, 0, 0],
...littleEndian32(sh.link),
...littleEndian32(sh.info),
...littleEndian32(sh.addralign),
...[0, 0, 0, 0],
...littleEndian32(sh.entsize),
...[0, 0, 0, 0],
]);
};
// null section
writeSectionHeader("", {
type: 0,
flags: 0,
addr: 0,
offset: 0,
size: 0,
link: 0,
info: 0,
addralign: 0,
entsize: 0,
});
// text section
writeSectionHeader(".text", {
type: /*SHT_PROGBITS*/ 1,
flags: /*SHF_ALLOC*/ (1 << 1) | /*SHF_EXECINSTR*/ (1 << 2),
addr: 0,
offset: 0,
size: textContent.length,
link: 0,
info: 0,
addralign: 16,
entsize: 0,
});
const symtab = new BufferBuilder();
const nameToSymIdx = new Map();
let symIdx = 0;
for (const sym of textRelativeSymbols) {
const nameIdx = strs.pushAndGet(sym.name);
symtab.append([
...littleEndian32(nameIdx),
/*STT_FUNC*/ 2 | /*STB_GLOBAL*/ (1 << 4),
/*STV_DEFAULT*/ 0,
/*shndx .text*/ ...littleEndian16(1),
/*value*/ ...littleEndian32(sym.offset),
...[0, 0, 0, 0],
/*size*/ ...littleEndian32(sym.size),
...[0, 0, 0, 0],
]);
nameToSymIdx.set(sym.name, symIdx);
symIdx++;
}
// symtab section
const strTableIndex = sectionCount + 1;
writeSectionHeader(".symtab", {
type: /*SHT_SYMTAB*/ 2,
flags: 0,
addr: 0,
offset: 0,
size: symtab.buffer.length,
link: strTableIndex,
info: 0,
addralign: 8,
entsize: 24,
});
// strtab section
writeSectionHeader(".strtab", {
type: /*SHT_STRTAB*/ 3,
flags: 0,
addr: 0,
offset: 0,
size: strs.out.buffer.length,
link: 0,
info: 0,
addralign: 1,
entsize: 0,
});
const shstrndx = littleEndian32(sectionCount);
out.buffer[shstrndxRef] = shstrndx[0];
out.buffer[shstrndxRef + 1] = shstrndx[1];
const totalSectionCount = littleEndian32(sectionCount + 1);
out.buffer[shnumRef] = totalSectionCount[0];
out.buffer[shnumRef + 1] = totalSectionCount[1];
// shstrtab section
writeSectionHeader(".shstrtab", {
type: /*SHT_STRTAB*/ 3,
flags: 0,
addr: 0,
offset: 0,
size: shstrs.out.buffer.length,
link: 0,
info: 0,
addralign: 1,
entsize: 0,
});
const alignTo = (align) => {
assertDefined(align);
const up = align - (out.buffer.length % align);
out.append(Array(up).fill(0));
};
const patch32 = (baseOffset, value) => {
assertDefined(baseOffset, value);
const encoded = littleEndian32(value);
out.buffer[baseOffset] = encoded[0];
out.buffer[baseOffset + 1] = encoded[1];
out.buffer[baseOffset + 2] = encoded[2];
out.buffer[baseOffset + 3] = encoded[3];
};
alignTo(16);
patch32(sectionOffsetRefs[".text"], out.currentPos);
out.append(textContent);
patch32(sectionOffsetRefs[".strtab"], out.currentPos);
out.append(strs.out.buffer);
alignTo(8);
patch32(sectionOffsetRefs[".symtab"], out.currentPos);
out.append(symtab.buffer);
patch32(sectionOffsetRefs[".shstrtab"], out.currentPos);
out.append(shstrs.out.buffer);
return out.buffer;
}
const funcs = [];
for (const func of ast) {
const ib = codegenFunction(func);
funcs.push({
name: func.name.ident,
code: ib.out,
});
}
console.log(funcs);
const obj = generateObjectFile(funcs);
return obj;
}
function link(object) {
// we could use a temporary directory in the future, but let's keep this debuggable for now
const outputFile = "output.o";
fs.writeFile(outputFile, object);
return new Promise((resolve, reject) => {
const gcc = spawn("gcc", [outputFile]);
gcc.stdout.on("data", (data) => {
process.stdout.write(data);
});
gcc.stderr.on("data", (data) => {
process.stderr.write(data);
});
gcc.on("close", (code) => {
if (code === 0) {
resolve();
} else {
reject(new CompilerError("gcc failed to link", 0));
}
})
});
}
async function compile(input) {
const tokens = lex(input);
console.log(tokens);
const ast = parse(tokens);
console.dir(ast, { depth: 20 });
const object = lower(ast);
return link(object);
}
const fileName = process.argv[2];
const input = await fs.readFile(fileName, "utf-8");
console.log(input);
try {
await compile(input);
} catch (e) {
if (e instanceof CompilerError) {
console.error(e.render(fileName, input));
} else {
throw e;
}
}
function assertDefined(...values) {
if (values.some((value) => value === undefined || value === null)) {
throw new Error(`assertion failed, value undefined or null`);
}
}