refactor file loading

This commit is contained in:
nora 2023-08-02 15:01:52 +02:00
parent beb0321382
commit 7eeaf548d0
9 changed files with 236 additions and 193 deletions

View file

@ -1,4 +1,4 @@
import { DUMMY_SPAN, Span } from "./error";
import { DUMMY_SPAN, LoadedFile, Span } from "./error";
import { LitIntType } from "./lexer";
import { ComplexMap, unwrap } from "./utils";
@ -57,6 +57,7 @@ export type Crate<P extends Phase> = {
rootItems: Item<P>[];
itemsById: ComplexMap<ItemId, Item<P>>;
packageName: string;
rootFile: LoadedFile,
} & P["typeckResults"];
export type DepCrate = Crate<Final>;
@ -615,6 +616,7 @@ export function foldAst<From extends Phase, To extends Phase>(
itemsById: folder.newItemsById,
typeckResults: "typeckResults" in ast ? ast.typeckResults : undefined,
packageName: ast.packageName,
rootFile: ast.rootFile,
};
}

View file

@ -1,6 +1,8 @@
import { Crate, DepCrate, Final, Item, ItemId, Phase } from "./ast";
import { DUMMY_SPAN, Span } from "./error";
import { Ids, unwrap } from "./utils";
import fs from "fs";
import path from "path";
export type CrateLoader = (
gcx: GlobalContext,
@ -21,7 +23,7 @@ export class GlobalContext {
public depCrates: Crate<Final>[] = [];
public crateId: Ids = new Ids();
constructor(public crateLoader: CrateLoader) {}
constructor(public opts: Options, public crateLoader: CrateLoader) {}
public findItem<P extends Phase>(
id: ItemId,
@ -47,3 +49,64 @@ export class GlobalContext {
return unwrap(crate.itemsById.get(id));
}
}
export type Options = {
input: string;
filename: string;
packageName: string;
debug: Set<string>;
noOutput: boolean;
};
export function parseArgs(hardcodedInput: string): Options {
let filename: string;
let input: string;
let packageName: string;
let debug = new Set<string>();
let noOutput = false;
if (process.argv.length > 2) {
filename = process.argv[2];
if (path.extname(filename) !== ".nil") {
console.error(process.argv);
console.error(
`error: filename must have \`.nil\` extension: \`${filename}\``
);
process.exit(1);
}
input = fs.readFileSync(filename, { encoding: "utf-8" });
packageName = path.basename(filename, ".nil");
const debugArg = process.argv.find((arg) => arg.startsWith("--debug="));
if (debugArg !== undefined) {
const debugs = debugArg.slice("--debug=".length);
debug = new Set(debugs.split(","));
}
if (process.argv.some((arg) => arg === "--no-output")) {
noOutput = true;
}
} else {
filename = "<hardcoded>";
input = hardcodedInput;
packageName = "test";
debug = new Set([
"tokens",
"parsed",
"resolved",
"typecked",
"wat",
"wasm-validate",
]);
}
return {
filename,
input,
packageName,
debug,
noOutput,
};
}

View file

@ -1,20 +1,32 @@
export type LoadedFile = {
path?: string;
content: string;
};
export type Span = {
start: number;
end: number;
file: LoadedFile;
};
export function spanMerge(a: Span, b: Span): Span {
if (a.file !== b.file) {
throw new Error("cannot merge spans from different files");
}
return {
start: Math.min(a.start, b.start),
end: Math.max(a.end, b.end),
file: a.file,
};
}
export const DUMMY_SPAN = { start: 0, end: 0 };
export const EOF_SPAN = {
export const DUMMY_SPAN: Span = { start: 0, end: 0, file: { content: "" } };
export const eofSpan = (file: LoadedFile): Span => ({
start: Number.MAX_SAFE_INTEGER,
end: Number.MAX_SAFE_INTEGER,
};
file,
});
export class CompilerError extends Error {
msg: string;
@ -28,8 +40,6 @@ export class CompilerError extends Error {
}
export function withErrorPrinter<R>(
input: string,
filename: string,
f: () => R,
afterError: (e: CompilerError) => R
): R {
@ -37,7 +47,7 @@ export function withErrorPrinter<R>(
return f();
} catch (e) {
if (e instanceof CompilerError) {
renderError(input, filename, e);
renderError(e);
return afterError(e);
} else {
throw e;
@ -45,30 +55,33 @@ export function withErrorPrinter<R>(
}
}
function renderError(input: string, filename: string, e: CompilerError) {
const lineSpans = lines(input);
function renderError(e: CompilerError) {
const { span } = e;
const { content } = span.file;
const lineSpans = lines(span.file);
const line =
e.span.start === Number.MAX_SAFE_INTEGER
span.start === Number.MAX_SAFE_INTEGER
? lineSpans[lineSpans.length - 1]
: lineSpans.find(
(line) => line.start <= e.span.start && line.end >= e.span.start
(line) => line.start <= span.start && line.end >= span.start
);
if (!line) {
throw Error(`Span out of bounds: ${e.span.start}..${e.span.end}`);
throw Error(`Span out of bounds: ${span.start}..${span.end}`);
}
const lineIdx = lineSpans.indexOf(line);
const lineNo = lineIdx + 1;
console.error(`error: ${e.message}`);
console.error(` --> ${filename}:${lineNo}`);
console.error(` --> ${span.file.path ?? "<unknown>"}:${lineNo}`);
console.error(`${lineNo} | ${spanToSnippet(input, line)}`);
console.error(`${lineNo} | ${spanToSnippet(content, line)}`);
const startRelLine =
e.span.start === Number.MAX_SAFE_INTEGER ? 0 : e.span.start - line.start;
span.start === Number.MAX_SAFE_INTEGER ? 0 : span.start - line.start;
const spanLength =
e.span.start === Number.MAX_SAFE_INTEGER
span.start === Number.MAX_SAFE_INTEGER
? 1
: min(e.span.end, line.end) - e.span.start;
: min(span.end, line.end) - span.start;
console.error(
`${" ".repeat(String(lineNo).length)} ${" ".repeat(
@ -84,12 +97,12 @@ function spanToSnippet(input: string, span: Span): string {
return input.slice(span.start, span.end);
}
export function lines(input: string): Span[] {
const lines: Span[] = [{ start: 0, end: 0 }];
export function lines(file: LoadedFile): Span[] {
const lines: Span[] = [{ start: 0, end: 0, file }];
for (let i = 0; i < input.length; i++) {
if (input[i] === "\n") {
lines.push({ start: i + 1, end: i + 1 });
for (let i = 0; i < file.content.length; i++) {
if (file.content[i] === "\n") {
lines.push({ start: i + 1, end: i + 1, file });
} else {
lines[lines.length - 1].end++;
}
@ -98,10 +111,6 @@ export function lines(input: string): Span[] {
return lines;
}
export function todo(msg: string): never {
throw new CompilerError(`TODO: ${msg}`, { start: 0, end: 0 });
}
function min(a: number, b: number): number {
return a < b ? a : b;
}

View file

@ -1,16 +1,16 @@
import { CompilerError, Span, withErrorPrinter } from "./error";
import { LoadedFile, withErrorPrinter } from "./error";
import { isValidIdent, tokenize } from "./lexer";
import { lower as lowerToWasm } from "./lower";
import { parse } from "./parser";
import { ParseState, parse } from "./parser";
import { printAst } from "./printer";
import { resolve } from "./resolve";
import { typeck } from "./typeck";
import { writeModuleWatToString } from "./wasm/wat";
import fs from "fs";
import path from "path";
import { exec } from "child_process";
import { Crate, Built, Typecked, DepCrate } from "./ast";
import { GlobalContext, CrateLoader } from "./context";
import { Crate, Built, Typecked } from "./ast";
import { GlobalContext, parseArgs } from "./context";
import { loadCrate } from "./loader";
const INPUT = `
extern mod std;
@ -33,70 +33,9 @@ function linkStd() = (
);
`;
type Config = {
input: string;
filename: string;
packageName: string;
debug: Set<string>;
noOutput: boolean;
};
function parseArgs(): Config {
let filename: string;
let input: string;
let packageName: string;
let debug = new Set<string>();
let noOutput = false;
if (process.argv.length > 2) {
filename = process.argv[2];
if (path.extname(filename) !== ".nil") {
console.error(process.argv);
console.error(
`error: filename must have \`.nil\` extension: \`${filename}\``
);
process.exit(1);
}
input = fs.readFileSync(filename, { encoding: "utf-8" });
packageName = path.basename(filename, ".nil");
const debugArg = process.argv.find((arg) => arg.startsWith("--debug="));
if (debugArg !== undefined) {
const debugs = debugArg.slice("--debug=".length);
debug = new Set(debugs.split(","));
}
if (process.argv.some((arg) => arg === "--no-output")) {
noOutput = true;
}
} else {
filename = "<hardcoded>";
input = INPUT;
packageName = "test";
debug = new Set([
"tokens",
"parsed",
"resolved",
"typecked",
"wat",
"wasm-validate",
]);
}
return {
filename,
input,
packageName,
debug,
noOutput,
};
}
function main() {
const config = parseArgs();
const { filename, packageName, input, debug } = config;
const opts = parseArgs(INPUT);
const { filename, packageName, input, debug } = opts;
if (!isValidIdent(packageName)) {
console.error(
@ -105,22 +44,24 @@ function main() {
process.exit(1);
}
const gcx = new GlobalContext(loadCrate);
const file: LoadedFile = { path: filename, content: input };
const gcx = new GlobalContext(opts, loadCrate);
const mainCrate = gcx.crateId.next();
withErrorPrinter(
input,
filename,
() => {
const start = Date.now();
const tokens = tokenize(input);
const tokens = tokenize(file);
if (debug.has("tokens")) {
console.log("-----TOKENS------------");
console.log(tokens);
}
const ast: Crate<Built> = parse(packageName, tokens, mainCrate);
const parseState: ParseState = { tokens, file };
const ast: Crate<Built> = parse(packageName, parseState, mainCrate);
if (debug.has("ast")) {
console.log("-----AST---------------");
@ -160,7 +101,7 @@ function main() {
console.log(moduleStringColor);
}
if (!config.noOutput) {
if (!opts.noOutput) {
fs.writeFileSync("out.wat", moduleString);
}
@ -189,62 +130,4 @@ function main() {
);
}
const loadCrate: CrateLoader = (
gcx: GlobalContext,
name: string,
span: Span
): DepCrate => {
// We really, really want a good algorithm for finding crates.
// But right now we just look for files in the CWD.
const existing = gcx.depCrates.find((crate) => crate.packageName === name);
if (existing) {
return existing;
}
const options = [`${name}.nil`, `${name}/${name}.mod.nil`];
let input: string | undefined = undefined;
let filename: string | undefined = undefined;
options.forEach((tryName) => {
try {
input = fs.readFileSync(tryName, { encoding: "utf-8" });
filename = tryName;
} catch (e) {}
});
if (input === undefined || filename === undefined) {
throw new CompilerError(
`failed to load ${name}, could not find ${options.join(" or ")}`,
span
);
}
const inputString: string = input;
return withErrorPrinter(
inputString,
filename,
(): DepCrate => {
const crateId = gcx.crateId.next();
const tokens = tokenize(inputString);
const ast = parse(name, tokens, crateId);
const resolved = resolve(gcx, ast);
console.log(resolved);
const typecked = typeck(gcx, resolved);
gcx.depCrates.push(typecked);
return typecked;
},
() => {
throw new CompilerError(
`failed to load crate ${name}: crate contains errors`,
span
);
}
);
};
main();

View file

@ -1,4 +1,4 @@
import { CompilerError, Span } from "./error";
import { CompilerError, LoadedFile, Span } from "./error";
export type DatalessToken =
| "function"
@ -85,13 +85,14 @@ const SINGLE_PUNCT: string[] = [
"%",
];
export function tokenize(input: string): Token[] {
export function tokenize(file: LoadedFile): Token[] {
const { content: input } = file;
const tokens: Token[] = [];
let i = 0;
finish: while (i < input.length) {
const next = input[i];
const span: Span = { start: i, end: i + 1 };
const span: Span = { start: i, end: i + 1, file };
if (next === "/" && input[i + 1] === "/") {
while (input[i] !== "\n") {
@ -205,7 +206,7 @@ export function tokenize(input: string): Token[] {
default:
throw new CompilerError(
`invalid escape character: ${input[i]}`,
{ start: span.end - 1, end: span.end }
{ start: span.end - 1, end: span.end, file }
);
}
continue;

77
src/loader.ts Normal file
View file

@ -0,0 +1,77 @@
import { DepCrate } from "./ast";
import { CrateLoader, GlobalContext } from "./context";
import { CompilerError, LoadedFile, Span, withErrorPrinter } from "./error";
import fs from "fs";
import path from "path";
import { tokenize } from "./lexer";
import { ParseState, parse } from "./parser";
import { resolve } from "./resolve";
import { typeck } from "./typeck";
export function loadModuleFile(
relativeTo: string,
moduleName: string,
span: Span
): LoadedFile {
const options = [
path.join(relativeTo, `${moduleName}.nil`),
path.join(relativeTo, moduleName, `${moduleName}.mod.nil`),
];
let content: string | undefined = undefined;
let filePath: string | undefined = undefined;
options.forEach((tryPath) => {
try {
content = fs.readFileSync(tryPath, { encoding: "utf-8" });
filePath = tryPath;
} catch (e) {}
});
if (content === undefined || filePath === undefined) {
throw new CompilerError(
`failed to load ${moduleName}, could not find ${options.join(" or ")}`,
span
);
}
return { content, path: filePath };
}
export const loadCrate: CrateLoader = (
gcx: GlobalContext,
name: string,
span: Span
): DepCrate => {
// We really, really want a good algorithm for finding crates.
// But right now we just look for files in the CWD.
const existing = gcx.depCrates.find((crate) => crate.packageName === name);
if (existing) {
return existing;
}
const file = loadModuleFile(".", name, span);
return withErrorPrinter(
(): DepCrate => {
const crateId = gcx.crateId.next();
const tokens = tokenize(file);
const parseState: ParseState = { tokens, file };
const ast = parse(name, parseState, crateId);
const resolved = resolve(gcx, ast);
console.log(resolved);
const typecked = typeck(gcx, resolved);
gcx.depCrates.push(typecked);
return typecked;
},
() => {
throw new CompilerError(
`failed to load crate ${name}: crate contains errors`,
span
);
}
);
};

View file

@ -32,33 +32,36 @@ import {
GlobalItem,
StructLiteralField,
} from "./ast";
import { CompilerError, EOF_SPAN, Span, spanMerge } from "./error";
import { CompilerError, eofSpan, LoadedFile, Span, spanMerge } from "./error";
import { BaseToken, Token, TokenIdent, TokenLitString } from "./lexer";
import { ComplexMap, ComplexSet, Ids } from "./utils";
type Parser<T> = (t: Token[]) => [Token[], T];
export type ParseState = { tokens: Token[]; file: LoadedFile };
type State = ParseState;
type Parser<T> = (t: State) => [State, T];
export function parse(
packageName: string,
t: Token[],
t: State,
crateId: number
): Crate<Built> {
const items: Item<Parsed>[] = [];
while (t.length > 0) {
while (t.tokens.length > 0) {
let item;
[t, item] = parseItem(t);
items.push(item);
}
const ast: Crate<Built> = buildCrate(packageName, items, crateId);
const ast: Crate<Built> = buildCrate(packageName, items, crateId, t.file);
validateAst(ast);
return ast;
}
function parseItem(t: Token[]): [Token[], Item<Parsed>] {
function parseItem(t: State): [State, Item<Parsed>] {
let tok;
[t, tok] = next(t);
if (tok.kind === "function") {
@ -211,7 +214,7 @@ type FunctionSig = {
returnType?: Type<Parsed>;
};
function parseFunctionSig(t: Token[]): [Token[], FunctionSig] {
function parseFunctionSig(t: State): [State, FunctionSig] {
let name;
[t, name] = expectNext<TokenIdent>(t, "identifier");
@ -238,7 +241,7 @@ function parseFunctionSig(t: Token[]): [Token[], FunctionSig] {
return [t, { name: name.ident, params, returnType }];
}
function parseExpr(t: Token[]): [Token[], Expr<Parsed>] {
function parseExpr(t: State): [State, Expr<Parsed>] {
/*
EXPR = ASSIGNMENT
@ -284,7 +287,7 @@ function mkParserExprBinary(
kinds: string[],
mkExpr = mkBinaryExpr
): Parser<Expr<Parsed>> {
function parser(t: Token[]): [Token[], Expr<Parsed>] {
function parser(t: State): [State, Expr<Parsed>] {
let lhs;
[t, lhs] = lower(t);
@ -328,7 +331,7 @@ const parseExprAssignment = mkParserExprBinary(
(lhs, rhs, span) => ({ kind: "assign", lhs, rhs, span })
);
function parseExprUnary(t: Token[]): [Token[], Expr<Parsed>] {
function parseExprUnary(t: State): [State, Expr<Parsed>] {
const peek = peekKind(t);
if (peek && UNARY_KINDS.includes(peek as UnaryKind)) {
let tok: Token;
@ -348,7 +351,7 @@ function parseExprUnary(t: Token[]): [Token[], Expr<Parsed>] {
return parseExprCall(t);
}
function parseExprCall(t: Token[]): [Token[], Expr<Parsed>] {
function parseExprCall(t: State): [State, Expr<Parsed>] {
let lhs: Expr<Parsed>;
[t, lhs] = parseExprAtom(t);
@ -385,7 +388,7 @@ function parseExprCall(t: Token[]): [Token[], Expr<Parsed>] {
return [t, lhs];
}
function parseExprAtom(startT: Token[]): [Token[], Expr<Parsed>] {
function parseExprAtom(startT: State): [State, Expr<Parsed>] {
// eslint-disable-next-line prefer-const
let [t, tok] = next(startT);
const span = tok.span;
@ -536,8 +539,8 @@ function parseExprAtom(startT: Token[]): [Token[], Expr<Parsed>] {
}
function parseStructInit(
t: Token[]
): [Token[], ExprStructLiteral<Parsed>["fields"]] {
t: State
): [State, ExprStructLiteral<Parsed>["fields"]] {
[t] = expectNext(t, "{");
let fields;
@ -558,7 +561,7 @@ function parseStructInit(
return [t, fields];
}
function parseType(t: Token[]): [Token[], Type<Parsed>] {
function parseType(t: State): [State, Type<Parsed>] {
let tok;
[t, tok] = next(t);
const span = tok.span;
@ -619,10 +622,10 @@ function parseType(t: Token[]): [Token[], Type<Parsed>] {
// helpers
function parseCommaSeparatedList<R>(
t: Token[],
t: State,
terminator: Token["kind"],
parser: Parser<R>
): [Token[], R[]] {
): [State, R[]] {
const items: R[] = [];
// () | (a) | (a,) | (a, b)
@ -651,29 +654,29 @@ function parseCommaSeparatedList<R>(
}
function eat<T extends BaseToken>(
t: Token[],
t: State,
kind: T["kind"]
): [Token[], T | undefined] {
): [State, T | undefined] {
if (peekKind(t) === kind) {
return expectNext(t, kind);
}
return [t, undefined];
}
function peekKind(t: Token[]): Token["kind"] | undefined {
function peekKind(t: State): Token["kind"] | undefined {
return maybeNextT(t)?.[1]?.kind;
}
function expectNext<T extends BaseToken>(
t: Token[],
t: State,
kind: T["kind"]
): [Token[], T & Token] {
): [State, T & Token] {
let tok;
[t, tok] = maybeNextT(t);
if (!tok) {
throw new CompilerError(
`expected \`${kind}\`, found end of file`,
EOF_SPAN
eofSpan(t.file)
);
}
if (tok.kind !== kind) {
@ -685,18 +688,19 @@ function expectNext<T extends BaseToken>(
return [t, tok as unknown as T & Token];
}
function next(t: Token[]): [Token[], Token] {
function next(t: State): [State, Token] {
const [rest, next] = maybeNextT(t);
if (!next) {
throw new CompilerError("unexpected end of file", EOF_SPAN);
throw new CompilerError("unexpected end of file", eofSpan(t.file));
}
return [rest, next];
}
function maybeNextT(t: Token[]): [Token[], Token | undefined] {
const next = t[0];
const rest = t.slice(1);
return [rest, next];
function maybeNextT(t: State): [State, Token | undefined] {
const next = t.tokens[0];
const rest = t.tokens.slice(1);
return [{ ...t, tokens: rest }, next];
}
function unexpectedToken(token: Token, expected: string): never {
@ -769,7 +773,8 @@ function validateAst(ast: Crate<Built>) {
function buildCrate(
packageName: string,
rootItems: Item<Parsed>[],
crateId: number
crateId: number,
rootFile: LoadedFile
): Crate<Built> {
const itemId = new Ids();
itemId.next(); // crate root ID
@ -780,6 +785,7 @@ function buildCrate(
rootItems,
itemsById: new ComplexMap(),
packageName,
rootFile,
};
const assigner: Folder<Parsed, Built> = {

View file

@ -74,6 +74,7 @@ export function resolve(
itemsById: cx.newItemsById,
rootItems,
packageName: ast.packageName,
rootFile: ast.rootFile,
};
}

View file

@ -423,6 +423,7 @@ export function typeck(
throw new CompilerError(`\`main\` function not found`, {
start: 0,
end: 1,
file: ast.rootFile,
});
}