From 7eeaf548d05ea55b7469cb344777d0e33ab81aba Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Wed, 2 Aug 2023 15:01:52 +0200 Subject: [PATCH] refactor file loading --- src/ast.ts | 4 +- src/context.ts | 65 +++++++++++++++++++++- src/error.ts | 59 +++++++++++--------- src/index.ts | 147 +++++-------------------------------------------- src/lexer.ts | 9 +-- src/loader.ts | 77 ++++++++++++++++++++++++++ src/parser.ts | 66 ++++++++++++---------- src/resolve.ts | 1 + src/typeck.ts | 1 + 9 files changed, 236 insertions(+), 193 deletions(-) create mode 100644 src/loader.ts diff --git a/src/ast.ts b/src/ast.ts index 8be7bf9..502fb55 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -1,4 +1,4 @@ -import { DUMMY_SPAN, Span } from "./error"; +import { DUMMY_SPAN, LoadedFile, Span } from "./error"; import { LitIntType } from "./lexer"; import { ComplexMap, unwrap } from "./utils"; @@ -57,6 +57,7 @@ export type Crate

= { rootItems: Item

[]; itemsById: ComplexMap>; packageName: string; + rootFile: LoadedFile, } & P["typeckResults"]; export type DepCrate = Crate; @@ -615,6 +616,7 @@ export function foldAst( itemsById: folder.newItemsById, typeckResults: "typeckResults" in ast ? ast.typeckResults : undefined, packageName: ast.packageName, + rootFile: ast.rootFile, }; } diff --git a/src/context.ts b/src/context.ts index 68c3551..d418368 100644 --- a/src/context.ts +++ b/src/context.ts @@ -1,6 +1,8 @@ import { Crate, DepCrate, Final, Item, ItemId, Phase } from "./ast"; import { DUMMY_SPAN, Span } from "./error"; import { Ids, unwrap } from "./utils"; +import fs from "fs"; +import path from "path"; export type CrateLoader = ( gcx: GlobalContext, @@ -21,7 +23,7 @@ export class GlobalContext { public depCrates: Crate[] = []; public crateId: Ids = new Ids(); - constructor(public crateLoader: CrateLoader) {} + constructor(public opts: Options, public crateLoader: CrateLoader) {} public findItem

( id: ItemId, @@ -47,3 +49,64 @@ export class GlobalContext { return unwrap(crate.itemsById.get(id)); } } + +export type Options = { + input: string; + filename: string; + packageName: string; + debug: Set; + noOutput: boolean; +}; + +export function parseArgs(hardcodedInput: string): Options { + let filename: string; + let input: string; + let packageName: string; + let debug = new Set(); + let noOutput = false; + + if (process.argv.length > 2) { + filename = process.argv[2]; + if (path.extname(filename) !== ".nil") { + console.error(process.argv); + + console.error( + `error: filename must have \`.nil\` extension: \`${filename}\`` + ); + process.exit(1); + } + + input = fs.readFileSync(filename, { encoding: "utf-8" }); + packageName = path.basename(filename, ".nil"); + + const debugArg = process.argv.find((arg) => arg.startsWith("--debug=")); + if (debugArg !== undefined) { + const debugs = debugArg.slice("--debug=".length); + debug = new Set(debugs.split(",")); + } + + if (process.argv.some((arg) => arg === "--no-output")) { + noOutput = true; + } + } else { + filename = ""; + input = hardcodedInput; + packageName = "test"; + debug = new Set([ + "tokens", + "parsed", + "resolved", + "typecked", + "wat", + "wasm-validate", + ]); + } + + return { + filename, + input, + packageName, + debug, + noOutput, + }; +} diff --git a/src/error.ts b/src/error.ts index 46ac4ae..d9797b6 100644 --- a/src/error.ts +++ b/src/error.ts @@ -1,20 +1,32 @@ +export type LoadedFile = { + path?: string; + content: string; +}; + export type Span = { start: number; end: number; + file: LoadedFile; }; export function spanMerge(a: Span, b: Span): Span { + if (a.file !== b.file) { + throw new Error("cannot merge spans from different files"); + } + return { start: Math.min(a.start, b.start), end: Math.max(a.end, b.end), + file: a.file, }; } -export const DUMMY_SPAN = { start: 0, end: 0 }; -export const EOF_SPAN = { +export const DUMMY_SPAN: Span = { start: 0, end: 0, file: { content: "" } }; +export const eofSpan = (file: LoadedFile): Span => ({ start: Number.MAX_SAFE_INTEGER, end: Number.MAX_SAFE_INTEGER, -}; + file, +}); export class CompilerError extends Error { msg: string; @@ -28,8 +40,6 @@ export class CompilerError extends Error { } export function withErrorPrinter( - input: string, - filename: string, f: () => R, afterError: (e: CompilerError) => R ): R { @@ -37,7 +47,7 @@ export function withErrorPrinter( return f(); } catch (e) { if (e instanceof CompilerError) { - renderError(input, filename, e); + renderError(e); return afterError(e); } else { throw e; @@ -45,30 +55,33 @@ export function withErrorPrinter( } } -function renderError(input: string, filename: string, e: CompilerError) { - const lineSpans = lines(input); +function renderError(e: CompilerError) { + const { span } = e; + const { content } = span.file; + + const lineSpans = lines(span.file); const line = - e.span.start === Number.MAX_SAFE_INTEGER + span.start === Number.MAX_SAFE_INTEGER ? lineSpans[lineSpans.length - 1] : lineSpans.find( - (line) => line.start <= e.span.start && line.end >= e.span.start + (line) => line.start <= span.start && line.end >= span.start ); if (!line) { - throw Error(`Span out of bounds: ${e.span.start}..${e.span.end}`); + throw Error(`Span out of bounds: ${span.start}..${span.end}`); } const lineIdx = lineSpans.indexOf(line); const lineNo = lineIdx + 1; console.error(`error: ${e.message}`); - console.error(` --> ${filename}:${lineNo}`); + console.error(` --> ${span.file.path ?? ""}:${lineNo}`); - console.error(`${lineNo} | ${spanToSnippet(input, line)}`); + console.error(`${lineNo} | ${spanToSnippet(content, line)}`); const startRelLine = - e.span.start === Number.MAX_SAFE_INTEGER ? 0 : e.span.start - line.start; + span.start === Number.MAX_SAFE_INTEGER ? 0 : span.start - line.start; const spanLength = - e.span.start === Number.MAX_SAFE_INTEGER + span.start === Number.MAX_SAFE_INTEGER ? 1 - : min(e.span.end, line.end) - e.span.start; + : min(span.end, line.end) - span.start; console.error( `${" ".repeat(String(lineNo).length)} ${" ".repeat( @@ -84,12 +97,12 @@ function spanToSnippet(input: string, span: Span): string { return input.slice(span.start, span.end); } -export function lines(input: string): Span[] { - const lines: Span[] = [{ start: 0, end: 0 }]; +export function lines(file: LoadedFile): Span[] { + const lines: Span[] = [{ start: 0, end: 0, file }]; - for (let i = 0; i < input.length; i++) { - if (input[i] === "\n") { - lines.push({ start: i + 1, end: i + 1 }); + for (let i = 0; i < file.content.length; i++) { + if (file.content[i] === "\n") { + lines.push({ start: i + 1, end: i + 1, file }); } else { lines[lines.length - 1].end++; } @@ -98,10 +111,6 @@ export function lines(input: string): Span[] { return lines; } -export function todo(msg: string): never { - throw new CompilerError(`TODO: ${msg}`, { start: 0, end: 0 }); -} - function min(a: number, b: number): number { return a < b ? a : b; } diff --git a/src/index.ts b/src/index.ts index c703435..605897b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,16 +1,16 @@ -import { CompilerError, Span, withErrorPrinter } from "./error"; +import { LoadedFile, withErrorPrinter } from "./error"; import { isValidIdent, tokenize } from "./lexer"; import { lower as lowerToWasm } from "./lower"; -import { parse } from "./parser"; +import { ParseState, parse } from "./parser"; import { printAst } from "./printer"; import { resolve } from "./resolve"; import { typeck } from "./typeck"; import { writeModuleWatToString } from "./wasm/wat"; import fs from "fs"; -import path from "path"; import { exec } from "child_process"; -import { Crate, Built, Typecked, DepCrate } from "./ast"; -import { GlobalContext, CrateLoader } from "./context"; +import { Crate, Built, Typecked } from "./ast"; +import { GlobalContext, parseArgs } from "./context"; +import { loadCrate } from "./loader"; const INPUT = ` extern mod std; @@ -33,70 +33,9 @@ function linkStd() = ( ); `; -type Config = { - input: string; - filename: string; - packageName: string; - debug: Set; - noOutput: boolean; -}; - -function parseArgs(): Config { - let filename: string; - let input: string; - let packageName: string; - let debug = new Set(); - let noOutput = false; - - if (process.argv.length > 2) { - filename = process.argv[2]; - if (path.extname(filename) !== ".nil") { - console.error(process.argv); - - console.error( - `error: filename must have \`.nil\` extension: \`${filename}\`` - ); - process.exit(1); - } - - input = fs.readFileSync(filename, { encoding: "utf-8" }); - packageName = path.basename(filename, ".nil"); - - const debugArg = process.argv.find((arg) => arg.startsWith("--debug=")); - if (debugArg !== undefined) { - const debugs = debugArg.slice("--debug=".length); - debug = new Set(debugs.split(",")); - } - - if (process.argv.some((arg) => arg === "--no-output")) { - noOutput = true; - } - } else { - filename = ""; - input = INPUT; - packageName = "test"; - debug = new Set([ - "tokens", - "parsed", - "resolved", - "typecked", - "wat", - "wasm-validate", - ]); - } - - return { - filename, - input, - packageName, - debug, - noOutput, - }; -} - function main() { - const config = parseArgs(); - const { filename, packageName, input, debug } = config; + const opts = parseArgs(INPUT); + const { filename, packageName, input, debug } = opts; if (!isValidIdent(packageName)) { console.error( @@ -105,22 +44,24 @@ function main() { process.exit(1); } - const gcx = new GlobalContext(loadCrate); + const file: LoadedFile = { path: filename, content: input }; + + const gcx = new GlobalContext(opts, loadCrate); const mainCrate = gcx.crateId.next(); withErrorPrinter( - input, - filename, () => { const start = Date.now(); - const tokens = tokenize(input); + const tokens = tokenize(file); if (debug.has("tokens")) { console.log("-----TOKENS------------"); console.log(tokens); } - const ast: Crate = parse(packageName, tokens, mainCrate); + const parseState: ParseState = { tokens, file }; + + const ast: Crate = parse(packageName, parseState, mainCrate); if (debug.has("ast")) { console.log("-----AST---------------"); @@ -160,7 +101,7 @@ function main() { console.log(moduleStringColor); } - if (!config.noOutput) { + if (!opts.noOutput) { fs.writeFileSync("out.wat", moduleString); } @@ -189,62 +130,4 @@ function main() { ); } -const loadCrate: CrateLoader = ( - gcx: GlobalContext, - name: string, - span: Span -): DepCrate => { - // We really, really want a good algorithm for finding crates. - // But right now we just look for files in the CWD. - - const existing = gcx.depCrates.find((crate) => crate.packageName === name); - if (existing) { - return existing; - } - - const options = [`${name}.nil`, `${name}/${name}.mod.nil`]; - - let input: string | undefined = undefined; - let filename: string | undefined = undefined; - options.forEach((tryName) => { - try { - input = fs.readFileSync(tryName, { encoding: "utf-8" }); - filename = tryName; - } catch (e) {} - }); - - if (input === undefined || filename === undefined) { - throw new CompilerError( - `failed to load ${name}, could not find ${options.join(" or ")}`, - span - ); - } - - const inputString: string = input; - - return withErrorPrinter( - inputString, - filename, - (): DepCrate => { - const crateId = gcx.crateId.next(); - - const tokens = tokenize(inputString); - const ast = parse(name, tokens, crateId); - const resolved = resolve(gcx, ast); - console.log(resolved); - - const typecked = typeck(gcx, resolved); - - gcx.depCrates.push(typecked); - return typecked; - }, - () => { - throw new CompilerError( - `failed to load crate ${name}: crate contains errors`, - span - ); - } - ); -}; - main(); diff --git a/src/lexer.ts b/src/lexer.ts index aa08105..5586092 100644 --- a/src/lexer.ts +++ b/src/lexer.ts @@ -1,4 +1,4 @@ -import { CompilerError, Span } from "./error"; +import { CompilerError, LoadedFile, Span } from "./error"; export type DatalessToken = | "function" @@ -85,13 +85,14 @@ const SINGLE_PUNCT: string[] = [ "%", ]; -export function tokenize(input: string): Token[] { +export function tokenize(file: LoadedFile): Token[] { + const { content: input } = file; const tokens: Token[] = []; let i = 0; finish: while (i < input.length) { const next = input[i]; - const span: Span = { start: i, end: i + 1 }; + const span: Span = { start: i, end: i + 1, file }; if (next === "/" && input[i + 1] === "/") { while (input[i] !== "\n") { @@ -205,7 +206,7 @@ export function tokenize(input: string): Token[] { default: throw new CompilerError( `invalid escape character: ${input[i]}`, - { start: span.end - 1, end: span.end } + { start: span.end - 1, end: span.end, file } ); } continue; diff --git a/src/loader.ts b/src/loader.ts new file mode 100644 index 0000000..526382c --- /dev/null +++ b/src/loader.ts @@ -0,0 +1,77 @@ +import { DepCrate } from "./ast"; +import { CrateLoader, GlobalContext } from "./context"; +import { CompilerError, LoadedFile, Span, withErrorPrinter } from "./error"; +import fs from "fs"; +import path from "path"; +import { tokenize } from "./lexer"; +import { ParseState, parse } from "./parser"; +import { resolve } from "./resolve"; +import { typeck } from "./typeck"; + +export function loadModuleFile( + relativeTo: string, + moduleName: string, + span: Span +): LoadedFile { + const options = [ + path.join(relativeTo, `${moduleName}.nil`), + path.join(relativeTo, moduleName, `${moduleName}.mod.nil`), + ]; + + let content: string | undefined = undefined; + let filePath: string | undefined = undefined; + options.forEach((tryPath) => { + try { + content = fs.readFileSync(tryPath, { encoding: "utf-8" }); + filePath = tryPath; + } catch (e) {} + }); + + if (content === undefined || filePath === undefined) { + throw new CompilerError( + `failed to load ${moduleName}, could not find ${options.join(" or ")}`, + span + ); + } + + return { content, path: filePath }; +} + +export const loadCrate: CrateLoader = ( + gcx: GlobalContext, + name: string, + span: Span +): DepCrate => { + // We really, really want a good algorithm for finding crates. + // But right now we just look for files in the CWD. + + const existing = gcx.depCrates.find((crate) => crate.packageName === name); + if (existing) { + return existing; + } + + const file = loadModuleFile(".", name, span); + + return withErrorPrinter( + (): DepCrate => { + const crateId = gcx.crateId.next(); + + const tokens = tokenize(file); + const parseState: ParseState = { tokens, file }; + const ast = parse(name, parseState, crateId); + const resolved = resolve(gcx, ast); + console.log(resolved); + + const typecked = typeck(gcx, resolved); + + gcx.depCrates.push(typecked); + return typecked; + }, + () => { + throw new CompilerError( + `failed to load crate ${name}: crate contains errors`, + span + ); + } + ); +}; diff --git a/src/parser.ts b/src/parser.ts index fc977f7..8897031 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -32,33 +32,36 @@ import { GlobalItem, StructLiteralField, } from "./ast"; -import { CompilerError, EOF_SPAN, Span, spanMerge } from "./error"; +import { CompilerError, eofSpan, LoadedFile, Span, spanMerge } from "./error"; import { BaseToken, Token, TokenIdent, TokenLitString } from "./lexer"; import { ComplexMap, ComplexSet, Ids } from "./utils"; -type Parser = (t: Token[]) => [Token[], T]; +export type ParseState = { tokens: Token[]; file: LoadedFile }; +type State = ParseState; + +type Parser = (t: State) => [State, T]; export function parse( packageName: string, - t: Token[], + t: State, crateId: number ): Crate { const items: Item[] = []; - while (t.length > 0) { + while (t.tokens.length > 0) { let item; [t, item] = parseItem(t); items.push(item); } - const ast: Crate = buildCrate(packageName, items, crateId); + const ast: Crate = buildCrate(packageName, items, crateId, t.file); validateAst(ast); return ast; } -function parseItem(t: Token[]): [Token[], Item] { +function parseItem(t: State): [State, Item] { let tok; [t, tok] = next(t); if (tok.kind === "function") { @@ -211,7 +214,7 @@ type FunctionSig = { returnType?: Type; }; -function parseFunctionSig(t: Token[]): [Token[], FunctionSig] { +function parseFunctionSig(t: State): [State, FunctionSig] { let name; [t, name] = expectNext(t, "identifier"); @@ -238,7 +241,7 @@ function parseFunctionSig(t: Token[]): [Token[], FunctionSig] { return [t, { name: name.ident, params, returnType }]; } -function parseExpr(t: Token[]): [Token[], Expr] { +function parseExpr(t: State): [State, Expr] { /* EXPR = ASSIGNMENT @@ -284,7 +287,7 @@ function mkParserExprBinary( kinds: string[], mkExpr = mkBinaryExpr ): Parser> { - function parser(t: Token[]): [Token[], Expr] { + function parser(t: State): [State, Expr] { let lhs; [t, lhs] = lower(t); @@ -328,7 +331,7 @@ const parseExprAssignment = mkParserExprBinary( (lhs, rhs, span) => ({ kind: "assign", lhs, rhs, span }) ); -function parseExprUnary(t: Token[]): [Token[], Expr] { +function parseExprUnary(t: State): [State, Expr] { const peek = peekKind(t); if (peek && UNARY_KINDS.includes(peek as UnaryKind)) { let tok: Token; @@ -348,7 +351,7 @@ function parseExprUnary(t: Token[]): [Token[], Expr] { return parseExprCall(t); } -function parseExprCall(t: Token[]): [Token[], Expr] { +function parseExprCall(t: State): [State, Expr] { let lhs: Expr; [t, lhs] = parseExprAtom(t); @@ -385,7 +388,7 @@ function parseExprCall(t: Token[]): [Token[], Expr] { return [t, lhs]; } -function parseExprAtom(startT: Token[]): [Token[], Expr] { +function parseExprAtom(startT: State): [State, Expr] { // eslint-disable-next-line prefer-const let [t, tok] = next(startT); const span = tok.span; @@ -536,8 +539,8 @@ function parseExprAtom(startT: Token[]): [Token[], Expr] { } function parseStructInit( - t: Token[] -): [Token[], ExprStructLiteral["fields"]] { + t: State +): [State, ExprStructLiteral["fields"]] { [t] = expectNext(t, "{"); let fields; @@ -558,7 +561,7 @@ function parseStructInit( return [t, fields]; } -function parseType(t: Token[]): [Token[], Type] { +function parseType(t: State): [State, Type] { let tok; [t, tok] = next(t); const span = tok.span; @@ -619,10 +622,10 @@ function parseType(t: Token[]): [Token[], Type] { // helpers function parseCommaSeparatedList( - t: Token[], + t: State, terminator: Token["kind"], parser: Parser -): [Token[], R[]] { +): [State, R[]] { const items: R[] = []; // () | (a) | (a,) | (a, b) @@ -651,29 +654,29 @@ function parseCommaSeparatedList( } function eat( - t: Token[], + t: State, kind: T["kind"] -): [Token[], T | undefined] { +): [State, T | undefined] { if (peekKind(t) === kind) { return expectNext(t, kind); } return [t, undefined]; } -function peekKind(t: Token[]): Token["kind"] | undefined { +function peekKind(t: State): Token["kind"] | undefined { return maybeNextT(t)?.[1]?.kind; } function expectNext( - t: Token[], + t: State, kind: T["kind"] -): [Token[], T & Token] { +): [State, T & Token] { let tok; [t, tok] = maybeNextT(t); if (!tok) { throw new CompilerError( `expected \`${kind}\`, found end of file`, - EOF_SPAN + eofSpan(t.file) ); } if (tok.kind !== kind) { @@ -685,18 +688,19 @@ function expectNext( return [t, tok as unknown as T & Token]; } -function next(t: Token[]): [Token[], Token] { +function next(t: State): [State, Token] { const [rest, next] = maybeNextT(t); if (!next) { - throw new CompilerError("unexpected end of file", EOF_SPAN); + throw new CompilerError("unexpected end of file", eofSpan(t.file)); } return [rest, next]; } -function maybeNextT(t: Token[]): [Token[], Token | undefined] { - const next = t[0]; - const rest = t.slice(1); - return [rest, next]; +function maybeNextT(t: State): [State, Token | undefined] { + const next = t.tokens[0]; + const rest = t.tokens.slice(1); + + return [{ ...t, tokens: rest }, next]; } function unexpectedToken(token: Token, expected: string): never { @@ -769,7 +773,8 @@ function validateAst(ast: Crate) { function buildCrate( packageName: string, rootItems: Item[], - crateId: number + crateId: number, + rootFile: LoadedFile ): Crate { const itemId = new Ids(); itemId.next(); // crate root ID @@ -780,6 +785,7 @@ function buildCrate( rootItems, itemsById: new ComplexMap(), packageName, + rootFile, }; const assigner: Folder = { diff --git a/src/resolve.ts b/src/resolve.ts index cc3b283..5ae1547 100644 --- a/src/resolve.ts +++ b/src/resolve.ts @@ -74,6 +74,7 @@ export function resolve( itemsById: cx.newItemsById, rootItems, packageName: ast.packageName, + rootFile: ast.rootFile, }; } diff --git a/src/typeck.ts b/src/typeck.ts index c6b3c0f..9a1999a 100644 --- a/src/typeck.ts +++ b/src/typeck.ts @@ -423,6 +423,7 @@ export function typeck( throw new CompilerError(`\`main\` function not found`, { start: 0, end: 1, + file: ast.rootFile, }); }