From 7cd50ab5549523ad474cea77e6d6c012366cebbf Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sat, 29 Jul 2023 23:37:01 +0200 Subject: [PATCH] fix many things --- src/ast.ts | 51 ++++++++++++++-- src/index.ts | 12 +++- src/lexer.ts | 4 ++ src/lower.ts | 161 ++++++++++++++++++++++++++++++++++++++----------- src/parser.ts | 26 ++++++-- src/printer.ts | 12 ++++ src/typeck.ts | 54 ++++++++++++++++- 7 files changed, 269 insertions(+), 51 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 5407a9d..92f636e 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -104,6 +104,20 @@ export type ExprIf = { else?: Expr; }; +export type ExprLoop = { + kind: "loop"; + body: Expr; +}; + +export type ExprBreak = { + kind: "break"; + /** + * The break target block. + * May be any control flow block, labelled from inside out. + */ + target?: number, +}; + export type ExprStructLiteral = { kind: "structLiteral"; name: Identifier; @@ -120,6 +134,8 @@ export type ExprKind = | ExprUnary | ExprCall | ExprIf + | ExprLoop + | ExprBreak | ExprStructLiteral; export type Expr = ExprKind & { @@ -202,7 +218,8 @@ export type TypeKind = | { kind: "tuple"; elems: Type[]; - }; + } + | { kind: "never" }; export type Type = TypeKind & { span: Span; @@ -217,9 +234,9 @@ export type Resolution = /** * The index of the local variable, from inside out. * ``` - * let a = 0; let b; (a, b); - * ^ ^ - * 1 0 + * let a; let b; (a, b); + * ^ ^ + * 1 0 * ``` * When traversing resolutions, a stack of locals has to be kept. * It's similar to a De Bruijn index. @@ -304,6 +321,10 @@ export type TyStruct = { fields: [string, Ty][]; }; +export type TyNever = { + kind: "never"; +}; + export type Ty = | TyString | TyInt @@ -312,7 +333,8 @@ export type Ty = | TyTuple | TyFn | TyVar - | TyStruct; + | TyStruct + | TyNever; export function tyIsUnit(ty: Ty): ty is TyUnit { return ty.kind === "tuple" && ty.elems.length === 0; @@ -322,9 +344,10 @@ export const TY_UNIT: Ty = { kind: "tuple", elems: [] }; export const TY_STRING: Ty = { kind: "string" }; export const TY_BOOL: Ty = { kind: "bool" }; export const TY_INT: Ty = { kind: "int", signed: false }; +export const TY_NEVER: Ty = { kind: "never" }; export type TypeckResults = { - main: ItemId; + main: Resolution; }; // folders @@ -459,6 +482,19 @@ export function superFoldExpr(expr: Expr, folder: Folder): Expr { else: expr.else && folder.expr(expr.else), }; } + case "loop": { + return { + ...expr, + kind: "loop", + body: folder.expr(expr.body), + }; + } + case "break": { + return { + ...expr, + kind: "break", + }; + } case "structLiteral": { return { ...expr, @@ -494,6 +530,9 @@ export function superFoldType(type: Type, folder: Folder): Type { span, }; } + case "never": { + return { ...type, kind: "never" }; + } } } diff --git a/src/index.ts b/src/index.ts index 9accbf7..886a7d3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -10,8 +10,13 @@ import fs from "fs"; import { exec } from "child_process"; const input = ` -function main() = ( - 1 + 2 * 3; +function main() = uwu(10); + +function a() = ; + +function uwu(a: Int) = if a != 0 then ( + print("uwu\n"); + uwu(a - 1); ); `; @@ -39,7 +44,8 @@ function main() { console.log("-----AST typecked------"); const typecked = typeck(resolved); - console.dir(typecked, { depth: 8 }); + const typeckPrinted = printAst(typecked); + console.log(typeckPrinted); console.log("-----wasm--------------"); const wasmModule = lowerToWasm(typecked); diff --git a/src/lexer.ts b/src/lexer.ts index eadb1fb..1b8d9f6 100644 --- a/src/lexer.ts +++ b/src/lexer.ts @@ -7,6 +7,8 @@ export type DatalessToken = | "then" | "else" | "type" + | "loop" + | "break" | "(" | ")" | "{" @@ -263,6 +265,8 @@ const KEYOWRDS: DatalessToken[] = [ "then", "else", "type", + "loop", + "break", ]; const KEYWORD_SET = new Set(KEYOWRDS); diff --git a/src/lower.ts b/src/lower.ts index 5c5e5a1..228f4d2 100644 --- a/src/lower.ts +++ b/src/lower.ts @@ -4,11 +4,11 @@ import { ExprBlock, FunctionDef, Item, + Resolution, Ty, TyFn, varUnreachable, } from "./ast"; -import { CompilerError } from "./error"; import { encodeUtf8 } from "./utils"; import * as wasm from "./wasm/defs"; @@ -19,30 +19,46 @@ const USIZE: wasm.ValType = "i32"; const POINTER: wasm.ValType = USIZE; const STRING_TYPES: wasm.ValType[] = [POINTER, USIZE]; -const STRING_ABI: ArgAbi & RetAbi = { +const STRING_ABI: ArgRetAbi = { kind: "aggregate", types: STRING_TYPES, }; const WASM_PAGE = 65536; +type Relocation = { + kind: "funccall"; + instr: wasm.Instr & { func: wasm.FuncIdx }; +} & { res: Resolution }; + +function setMap(map: Map, V>, key: K, value: V) { + map.set(JSON.stringify(key), value); +} + +function getMap( + map: Map, V>, + key: K +): V | undefined { + return map.get(JSON.stringify(key)); +} + export type Context = { mod: wasm.Module; funcTypes: Map, wasm.TypeIdx>; reservedHeapMemoryStart: number; - funcIndices: Map; + funcIndices: Map, wasm.FuncIdx>; ast: Ast; + relocations: Relocation[]; }; function internFuncType(cx: Context, type: wasm.FuncType): wasm.TypeIdx { - const s = JSON.stringify(type); - const existing = cx.funcTypes.get(s); + const existing = getMap(cx.funcTypes, type); if (existing !== undefined) { return existing; } const idx = cx.mod.types.length; cx.mod.types.push(type); - cx.funcTypes.set(s, idx); + setMap(cx.funcTypes, type, idx); return idx; } @@ -102,6 +118,7 @@ export function lower(ast: Ast): wasm.Module { funcIndices: new Map(), reservedHeapMemoryStart: 0, ast, + relocations: [], }; ast.items.forEach((item) => { @@ -120,6 +137,23 @@ export function lower(ast: Ast): wasm.Module { addRt(cx, ast); + // THE LINKER + const offset = cx.mod.imports.length; + cx.relocations.forEach((rel) => { + switch (rel.kind) { + case "funccall": { + const idx = getMap(cx.funcIndices, rel.res); + if (idx === undefined) { + throw new Error( + `no function found for relocation '${JSON.stringify(rel.res)}'` + ); + } + rel.instr.func = offset + idx; + } + } + }); + // END OF THE LINKER + return mod; } @@ -131,13 +165,9 @@ type FuncContext = { varLocations: VarLocation[]; }; -type Abi = { params: ArgAbi[]; ret: RetAbi }; +type FnAbi = { params: ArgRetAbi[]; ret: ArgRetAbi }; -type ArgAbi = - | { kind: "scalar"; type: wasm.ValType } - | { kind: "zst" } - | { kind: "aggregate"; types: wasm.ValType[] }; -type RetAbi = +type ArgRetAbi = | { kind: "scalar"; type: wasm.ValType } | { kind: "zst" } | { kind: "aggregate"; types: wasm.ValType[] }; @@ -168,7 +198,11 @@ function lowerFunc(cx: Context, item: Item, func: FunctionDef) { const idx = fcx.cx.mod.funcs.length; fcx.cx.mod.funcs.push(wasmFunc); - fcx.cx.funcIndices.set(fcx.item.id, idx); + setMap( + fcx.cx.funcIndices, + { kind: "item", index: fcx.item.id }, + idx + ); } /* @@ -372,18 +406,17 @@ function lowerExpr(fcx: FuncContext, instrs: wasm.Instr[], expr: Expr) { if (expr.lhs.kind !== "ident") { todo("non constant calls"); } - if (expr.lhs.value.res!.kind !== "builtin") { - todo("youre not a builtin, fuck you"); - } - const printIdx = - fcx.cx.ast.items.filter((item) => item.kind === "function").length + - /*import*/ 1 + - /*_start*/ 1; + const callInstr: wasm.Instr = { kind: "call", func: 9999999999 }; + fcx.cx.relocations.push({ + kind: "funccall", + instr: callInstr, + res: expr.lhs.value.res!, + }); + expr.args.forEach((arg) => { lowerExpr(fcx, instrs, arg); }); - instrs.push({ kind: "call", func: printIdx }); - + instrs.push(callInstr); break; } case "if": { @@ -407,6 +440,43 @@ function lowerExpr(fcx: FuncContext, instrs: wasm.Instr[], expr: Expr) { break; } + case "loop": { + const outerBlockInstrs: wasm.Instr[] = []; + + const bodyInstrs: wasm.Instr[] = []; + lowerExpr(fcx, bodyInstrs, expr.body); + bodyInstrs.push({ + kind: "br", + label: /*innermost control structure, the loop*/ 0, + }); + + outerBlockInstrs.push({ + kind: "loop", + instrs: bodyInstrs, + type: blockTypeForBody(fcx.cx, expr.ty!), + }); + + instrs.push({ + kind: "block", + instrs: outerBlockInstrs, + type: blockTypeForBody(fcx.cx, expr.ty!), + }); + + break; + } + case "break": { + instrs.push({ + kind: "br", + label: expr.target! + /* the block outside the loop */ 1, + }); + break; + } + case "structLiteral": { + todo("struct literal"); + } + default: { + const _: never = expr; + } } } @@ -444,12 +514,12 @@ function loadVariable(instrs: wasm.Instr[], loc: VarLocation) { } } -function computeAbi(ty: TyFn): Abi { - const scalar = (type: wasm.ValType): ArgAbi & RetAbi => +function computeAbi(ty: TyFn): FnAbi { + const scalar = (type: wasm.ValType): ArgRetAbi => ({ kind: "scalar", type } as const); - const zst: ArgAbi & RetAbi = { kind: "zst" }; + const zst: ArgRetAbi = { kind: "zst" }; - function paramAbi(param: Ty): ArgAbi { + function paramAbi(param: Ty): ArgRetAbi { switch (param.kind) { case "string": return STRING_ABI; @@ -470,6 +540,8 @@ function computeAbi(ty: TyFn): Abi { todo("complex tuple abi"); case "struct": todo("struct ABI"); + case "never": + return zst; case "var": varUnreachable(); } @@ -477,7 +549,7 @@ function computeAbi(ty: TyFn): Abi { const params = ty.params.map(paramAbi); - let ret: RetAbi; + let ret: ArgRetAbi; switch (ty.returnTy.kind) { case "string": ret = STRING_ABI; @@ -503,6 +575,9 @@ function computeAbi(ty: TyFn): Abi { todo("complex tuple abi"); case "struct": todo("struct ABI"); + case "never": + ret = zst; + break; case "var": varUnreachable(); } @@ -510,7 +585,7 @@ function computeAbi(ty: TyFn): Abi { return { params, ret }; } -function wasmTypeForAbi(abi: Abi): { +function wasmTypeForAbi(abi: FnAbi): { type: wasm.FuncType; paramLocations: VarLocation[]; } { @@ -568,6 +643,8 @@ function wasmTypeForBody(ty: Ty): wasm.ValType[] { todo("fn types"); case "struct": todo("struct types"); + case "never": + return []; case "var": varUnreachable(); } @@ -588,20 +665,26 @@ function todo(msg: string): never { // Make the program runnable using wasi-preview-1 function addRt(cx: Context, ast: Ast) { const { mod } = cx; - const main = cx.funcIndices.get(ast.typeckResults!.main); - if (main === undefined) { - throw new Error(`main function (${main}) was not compiled.`); - } + + const mainCall: wasm.Instr = { kind: "call", func: 9999999 }; + cx.relocations.push({ + kind: "funccall", + instr: mainCall, + res: ast.typeckResults!.main, + }); const start: wasm.Func = { _name: "_start", type: internFuncType(cx, { params: [], returns: [] }), locals: [], - body: [{ kind: "call", func: main + 1 }], + body: [mainCall], }; const startIdx = mod.funcs.length; mod.funcs.push(start); + console.log(mod.funcs.map(({ _name }) => _name)); + + console.log(startIdx); const reserveMemory = (amount: number) => { const start = cx.reservedHeapMemoryStart; @@ -645,8 +728,16 @@ function addRt(cx: Context, ast: Ast) { { kind: "drop" }, ], }; - + const printIdx = cx.mod.funcs.length; cx.mod.funcs.push(print); - mod.exports.push({ name: "_start", desc: { kind: "func", idx: startIdx } }); + cx.funcIndices.set( + JSON.stringify({ kind: "builtin", name: "print" }), + printIdx + ); + + mod.exports.push({ + name: "_start", + desc: { kind: "func", idx: startIdx + mod.imports.length }, + }); } diff --git a/src/parser.ts b/src/parser.ts index 66a98a3..3c6b91d 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -130,7 +130,7 @@ function parseItem(t: Token[]): [Token[], Item] { return [t, { kind: "type", node: def, span: name.span, id: 0 }]; } else { - unexpectedToken(tok); + unexpectedToken(tok, "item"); } } @@ -140,6 +140,8 @@ function parseExpr(t: Token[]): [Token[], Expr] { LET = "let" NAME { ":" TYPE } "=" EXPR "in" EXPR IF = "if" EXPR "then" EXPR { "else" EXPR } + LOOP = "loop" EXPR + BREAK = "break" // The precende here is pretty arbitrary since we forbid mixing of operators // with different precedence classes anyways. @@ -154,7 +156,7 @@ function parseExpr(t: Token[]): [Token[], Expr] { CALL = ATOM { "(" EXPR_LIST ")" } - ATOM = "(" { EXPR ";" } EXPR ")" | IDENT { STRUCT_INIT } | LITERAL | EMPTY | LET | IF + ATOM = "(" { EXPR ";" } EXPR ")" | IDENT { STRUCT_INIT } | LITERAL | EMPTY | LET | IF | LOOP | BREAK EMPTY = STRUCT_INIT = "{" { NAME ":" EXPR } { "," NAME ":" EXPR } { "," } "}" EXPR_LIST = { EXPR { "," EXPR } { "," } } @@ -338,6 +340,7 @@ function parseExprAtom(startT: Token[]): [Token[], Expr] { if (tok.kind === "if") { let cond; [t, cond] = parseExpr(t); + [t] = expectNext(t, "then"); let then; [t, then] = parseExpr(t); @@ -352,6 +355,16 @@ function parseExprAtom(startT: Token[]): [Token[], Expr] { return [t, { kind: "if", cond, then, else: elsePart, span: tok.span }]; } + if (tok.kind === "loop") { + let body; + [t, body] = parseExpr(t); + return [t, { kind: "loop", body, span: tok.span }]; + } + + if (tok.kind === "break") { + return [t, { kind: "break", span: tok.span }]; + } + // Parse nothing at all. return [startT, { kind: "empty", span }]; } @@ -379,6 +392,9 @@ function parseType(t: Token[]): [Token[], Type] { const span = tok.span; switch (tok.kind) { + case "!": { + return [t, { kind: "never", span }]; + } case "identifier": { return [ t, @@ -450,7 +466,7 @@ function parseCommaSeparatedList( // No comma? Fine, you don't like trailing commas. // But this better be the end. if (next(t)[1]?.kind !== terminator) { - unexpectedToken(next(t)[1]); + unexpectedToken(next(t)[1], `, or ${terminator}`); } break; } @@ -501,8 +517,8 @@ function maybeNextT(t: Token[]): [Token[], Token | undefined] { return [rest, next]; } -function unexpectedToken(token: Token): never { - throw new CompilerError("unexpected token", token.span); +function unexpectedToken(token: Token, expected: string): never { + throw new CompilerError(`unexpected token, expected ${expected}`, token.span); } function validateAst(ast: Ast) { diff --git a/src/printer.ts b/src/printer.ts index 90c6ebd..4b28fb2 100644 --- a/src/printer.ts +++ b/src/printer.ts @@ -126,6 +126,13 @@ function printExpr(expr: Expr, indent: number): string { indent + 1 )}${elsePart}`; } + case "loop": { + return `loop ${printExpr(expr.body, indent + 1)}`; + } + case "break": { + const target = expr.target !== undefined ? `#${expr.target}` : ""; + return `break${target}`; + } case "structLiteral": { return `${printIdent(expr.name)} { ${expr.fields .map(([name, expr]) => `${name.name}: ${printExpr(expr, indent + 1)}`) @@ -142,6 +149,8 @@ function printType(type: Type): string { return `[${printType(type.elem)}]`; case "tuple": return `(${type.elems.map(printType).join(", ")})`; + case "never": + return "!"; } } @@ -188,6 +197,9 @@ export function printTy(ty: Ty): string { case "struct": { return ty.name; } + case "never": { + return "!"; + } } } diff --git a/src/typeck.ts b/src/typeck.ts index 944e9ea..e0eebf1 100644 --- a/src/typeck.ts +++ b/src/typeck.ts @@ -16,6 +16,7 @@ import { Ty, TY_BOOL, TY_INT, + TY_NEVER, TY_STRING, TY_UNIT, TyFn, @@ -78,6 +79,9 @@ function lowerAstTyBase( ), }; } + case "never": { + return TY_NEVER; + } } } @@ -234,7 +238,7 @@ export function typeck(ast: Ast): Ast { } typecked.typeckResults = { - main: main.id, + main: { kind: "item", index: main.id }, }; return typecked; @@ -337,7 +341,10 @@ export class InferContext { this.constrainVar(rhs.index, lhs); return; } - // type variable handling here + + if (rhs.kind === "never") { + return; + } switch (lhs.kind) { case "string": { @@ -397,6 +404,8 @@ export function checkBody( typeOfItem: (index: number) => Ty ): Expr { const localTys = [...fnTy.params]; + const loopState: { hasBreak: boolean; nestingDepth: number }[] = []; + let currentNestingDepth = 0; const infcx = new InferContext(); @@ -460,6 +469,7 @@ export function checkBody( }; } case "block": { + currentNestingDepth++; const prevLocalTysLen = localTys.length; const exprs = expr.exprs.map((expr) => this.expr(expr)); @@ -468,6 +478,8 @@ export function checkBody( localTys.length = prevLocalTysLen; + currentNestingDepth--; + return { ...expr, exprs, @@ -544,8 +556,10 @@ export function checkBody( } case "if": { const cond = this.expr(expr.cond); + currentNestingDepth++; const then = this.expr(expr.then); const elsePart = expr.else && this.expr(expr.else); + currentNestingDepth--; infcx.assign(TY_BOOL, cond.ty!, cond.span); @@ -560,6 +574,42 @@ export function checkBody( return { ...expr, cond, then, else: elsePart, ty }; } + case "loop": { + currentNestingDepth++; + loopState.push({ + hasBreak: false, + nestingDepth: currentNestingDepth, + }); + + const body = this.expr(expr.body); + infcx.assign(TY_UNIT, body.ty!, body.span); + + const hadBreak = loopState.pop(); + const ty = hadBreak ? TY_UNIT : TY_NEVER; + + currentNestingDepth--; + + return { + ...expr, + body, + ty, + }; + } + case "break": { + if (loopState.length === 0) { + throw new CompilerError("break outside loop", expr.span); + } + const loopDepth = loopState[loopState.length - 1].nestingDepth; + loopState[loopState.length - 1].hasBreak = true; + + const target = currentNestingDepth - loopDepth; + + return { + ...expr, + ty: TY_NEVER, + target, + }; + } case "structLiteral": { const fields = expr.fields.map<[Identifier, Expr]>(([name, expr]) => [ name,