From b2ed872ea79d5da49e5ff8c7b910d35a4374b3a2 Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:55:17 +0100 Subject: [PATCH] init --- .envrc | 2 + index.js | 457 +++++++++++++++++++++++++++++++++++++++++++++++++++ input.c | 4 + package.json | 13 ++ shell.nix | 3 + 5 files changed, 479 insertions(+) create mode 100644 .envrc create mode 100644 index.js create mode 100644 input.c create mode 100644 package.json create mode 100644 shell.nix diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..c9293c0 --- /dev/null +++ b/.envrc @@ -0,0 +1,2 @@ +use nix + diff --git a/index.js b/index.js new file mode 100644 index 0000000..290edd5 --- /dev/null +++ b/index.js @@ -0,0 +1,457 @@ +import fs from "node:fs/promises"; + +// https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf +// yep. + +class CompilerError extends Error { + constructor(message, span) { + super(message); + if (!span) { + throw new Error("span must be present"); + } + this.span = span; + } + + render(sourceFileName, source) { + const lines = source.split("\n"); + let total = 0; + let lineNumber = -1; + let colNumber = -1; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const newTotal = total + line.length + 1; + if (newTotal > this.span) { + lineNumber = i + 1; + colNumber = this.span - total + 1; + break; + } + total = newTotal; + } + + return `error: ${sourceFileName}:${lineNumber}:${colNumber}: ${this.message}`; + } +} + +function lex(input) { + function alphabetic(char) { + return (char >= "a" && char <= "z") || (char >= "A" && char <= "Z"); + } + + const tokens = []; + let i = 0; + while (i < input.length) { + const head = input[i]; + i++; + if (head === " " || head === "\t" || head === "\n") { + continue; + } + if (head === "/" && input[i] === "/") { + while (input[i] !== "\n") { + i++; + } + continue; + } + if (head >= "0" && head <= "9") { + const span = i - 1; + let number = head; + while (input[i] >= "0" && input[i] <= "9") { + number += input[i]; + i++; + } + tokens.push({ + kind: "integer", + integer: Number(number), + span, + }); + } else if (alphabetic(head)) { + const span = i - 1; + let ident = head; + while (alphabetic(input[i])) { + ident += input[i]; + i++; + } + tokens.push({ + kind: "ident", + ident, + span, + }); + } else if (head === '"') { + const span = i - 1; + let string = ""; + while (input[i] !== '"') { + string += input[i]; + i++; + } + i++; // " + tokens.push({ + kind: "string", + string, + span, + }); + } else { + switch (head) { + case "(": + case ")": + case "{": + case "}": + case ",": + case ";": { + tokens.push({ + kind: head, + span: i - 1, + }); + break; + } + default: { + throw new CompilerError(`unexpected character '${head}'`, i - 1); + } + } + } + } + return tokens; +} + +function parse(tokens) { + const tokenIter = { + tokens, + nextIndex: 0, + hasNext() { + return this.nextIndex < tokens.length; + }, + peek() { + if (!this.hasNext()) { + return undefined; + } + return this.tokens[this.nextIndex]; + }, + next(description) { + if (!this.hasNext()) { + throw new CompilerError( + `unexpected end of file while parsing ${description}` + ); + } + + return this.tokens[this.nextIndex++]; + }, + expect(kind, description) { + const tok = this.next(description); + if (tok.kind !== kind) { + throw new CompilerError( + `expected '${kind}' for ${description}, found ${tok.kind}`, + tok.span + ); + } + return tok; + }, + }; + + function parseType(tok, description) { + const first = tok.next(`type for ${description}`); + if (first.kind === "ident") { + switch (first.ident) { + case "int": { + return { + kind: "int", + span: first.span, + }; + } + } + } else { + throw new CompilerError( + `unexpected token ${first.kind} while parsing type for ${description}`, + first.span + ); + } + } + + function parseExpr(tok) { + function generateBinaryParser(ops, higher) { + return function parseBinaryExpr(tok) { + const lhs = higher(tok); + if (ops.includes(tok.peek()?.kind)) { + const op = tok.next("binary operator"); + const rhs = parseBinaryExpr(tok); + + return { + kind: op.kind, + span: op.span, + lhs, + rhs, + }; + } + + return lhs; + }; + } + + function parsePrimary(tok) { + const head = tok.next("expression"); + switch (head.kind) { + case "string": { + return { + kind: "string", + string: head.string, + span: head.span, + }; + } + case "ident": { + return { + kind: "ident", + string: head.ident, + span: head.span, + }; + } + case "integer": { + return { + kind: "integer", + integer: head.integer, + span: head.span, + }; + } + default: { + throw new CompilerError( + `unexpected token ${head.kind} in expression`, + head.span + ); + } + } + } + + function parsePostfix(tok) { + let lhs = parsePrimary(tok); + + while (true) { + const next = tok.peek()?.kind; + if (next === "(") { + const popen = tok.next("open parenthesis of call"); + + const args = []; + let isFirst = true; + while (tok.peek()?.kind !== ")") { + if (!isFirst) { + tok.expect(",", "comma between arguments"); + } + const next = parseExpr(tok); + args.push(next); + } + tok.expect(")", "closing parenthesis of call"); + + lhs = { + kind: "call", + lhs, + args, + span: popen.span, + }; + } else { + break; + } + } + + return lhs; + } + + const parseLogicalAnd = generateBinaryParser(["&&"], parsePostfix); + const parseLogicalOr = generateBinaryParser(["||"], parseLogicalAnd); + const parseAssignment = generateBinaryParser( + ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", , "^=", "|="], + parseLogicalOr + ); + const parseComma = generateBinaryParser([","], parseAssignment); + + return parseComma(tok); + } + + function parseBlock(tok) { + tok.expect("{", "start of block"); + + const statements = []; + + while (tok.peek()?.kind !== "}") { + // TODO: non-expression statements + const expr = parseExpr(tok); + statements.push({ + kind: "expr", + expr, + span: expr.span, + }); + tok.expect(";", "end of statement"); + } + + tok.expect("}", "end of block"); + + return statements; + } + + function parseDeclaration(tok) { + const ret = parseType(tok, "function return"); + const name = tok.expect("ident", "function name"); + + tok.expect("(", "function parentheses"); + + const params = []; + + while (tok.peek()?.kind !== ")") { + const type = parseType(tok, "function parameter"); + const name = tok.next("function name"); + if (name.kind !== "ident") { + throw new CompilerError( + `expected identifier for function parameter, found ${name.kind}`, + name.span + ); + } + params.push({ + type, + name: name.ident, + }); + } + + tok.expect(")", "function parentheses"); + + const body = parseBlock(tok); + + return { + kind: "function", + ret, + name, + body, + }; + } + + const decls = []; + while (tokenIter.hasNext()) { + const decl = parseDeclaration(tokenIter); + decls.push(decl); + } + + return decls; +} + +function lower(ast) { + /* + x86-64 codegen. + + strategy: every expression returns its result in rax. + */ + + function littleEndian32(number) { + return [ + number & 0xf, + (number >> 8) & 0xf, + (number >> 16) & 0xf, + (number >> 24) & 0xf, + ]; + } + + const MOD_REG = 0b11; + + const RM_A = 0b000; + const RM_C = 0b001; + + const REG_A = RM_A; + const REG_C = RM_C; + + const REG_IGNORED = 0; + + function modRm(mod, rm, reg) { + return (mod >> 6) | rm | (reg << 3); + } + + class InstBuilder { + #out; + #stackSize; + constructor() { + this.#out = new Uint8Array(); + this.#stackSize = 0; + } + + reserveStack(size) { + this.#stackSize += size; + return this.#stackSize; + } + + movEaxImm32(imm) { + // mov eax, imm + this.#append([ + 0xc7, + modRm(MOD_REG, RM_A, REG_IGNORED), + ...littleEndian32(imm), + ]); + } + + #append(code) { + this.#out += code; + } + } + + function codegenExpr(ib, expr) { + switch (expr.kind) { + case "call": { + if (expr.lhs.kind !== "ident") { + throw new Error("bad"); + } + if (expr.args.length !== 1) { + throw new Error("bad"); + } + + const arg0 = codegenExpr(ib, expr.args[0]); + + break; + } + case "integer": { + ib.movEaxImm32(expr.integer); + break; + } + case "+": { + } + default: { + throw new Error(`unsupported expr: ${expr.kind}`); + } + } + } + + function codegenFunction(func) { + const ib = new InstBuilder(); + + for (const stmt of func.body) { + console.log("doing cg for", stmt.kind); + + switch (stmt.kind) { + case "expr": { + codegenExpr(ib, stmt.expr); + break; + } + default: { + throw new Error(`unsupported stmt: ${stmt.kind}`); + } + } + } + + return ib; + } + + for (const func of ast) { + const ib = codegenFunction(func); + } +} + +function compile(input) { + const tokens = lex(input); + console.log(tokens); + const ast = parse(tokens); + console.dir(ast, { depth: 20 }); + lower(ast); +} + +const fileName = process.argv[2]; +const input = await fs.readFile(fileName, "utf-8"); +console.log(input); + +try { + compile(input); +} catch (e) { + if (e instanceof CompilerError) { + console.error(e.render(fileName, input)); + } else { + throw e; + } +} diff --git a/input.c b/input.c new file mode 100644 index 0000000..e8acc82 --- /dev/null +++ b/input.c @@ -0,0 +1,4 @@ +int main(int argc) +{ + //exit(45); +} \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..e847459 --- /dev/null +++ b/package.json @@ -0,0 +1,13 @@ +{ + "name": "jompiler", + "version": "1.0.0", + "main": "index.js", + "private": true, + "type": "module", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "", + "license": "ISC", + "description": "" +} diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..d0e18d1 --- /dev/null +++ b/shell.nix @@ -0,0 +1,3 @@ +{ pkgs ? import {} }: pkgs.mkShell { + nativeBuildInputs = with pkgs; [ nodejs_22 ]; +}