This commit is contained in:
nora 2023-07-23 14:02:53 +02:00
parent 91b183c002
commit 4e95bc05a3
9 changed files with 640 additions and 132 deletions

View file

@ -1,5 +1,5 @@
/** @type {import('ts-jest').JestConfigWithTsJest} */ /** @type {import('ts-jest').JestConfigWithTsJest} */
module.exports = { module.exports = {
preset: 'ts-jest', preset: "ts-jest",
testEnvironment: 'node', testEnvironment: "node",
}; };

View file

@ -1,33 +1,116 @@
import { Span } from "./error"; import { Span } from "./error";
export type ItemKind = { export type ItemKind = {
kind: "function", kind: "function";
node: FunctionDef, node: FunctionDef;
}; };
export type Item = ItemKind & { export type Item = ItemKind & {
span: Span, span: Span;
} };
export type FunctionDef = { export type FunctionDef = {
name: string, name: string;
args: FunctionArg[], args: FunctionArg[];
body: Expr, body: Expr;
} };
export type FunctionArg = { export type FunctionArg = {
name: string, name: string;
span: Span, span: Span;
} };
export type ExprKind = { export type ExprKind =
kind: "lit_string", | { kind: "empty" }
value: string, | { kind: "let"; name: string; rhs: Expr; after: Expr }
} | { | { kind: "block"; exprs: Expr[] }
kind: "ident", | {
value: string, kind: "literal";
} value: Literal;
}
| {
kind: "ident";
value: string;
}
| {
kind: "binary";
binaryKind: BinaryKind;
lhs: Expr;
rhs: Expr;
}
| {
kind: "unary",
unaryKind: UnaryKind,
rhs: Expr,
}
| {
kind: "call",
lhs: Expr,
args: Expr[],
};
export type Expr = ExprKind & { export type Expr = ExprKind & {
span: Span, span: Span;
};
export type Literal =
| {
kind: "str";
value: string;
}
| {
kind: "int";
value: number;
};
export type BinaryKind =
| "+"
| "-"
| "*"
| "/"
| "&"
| "|"
| "<"
| ">"
| "=="
| "<="
| ">="
| "!=";
export const COMPARISON_KINDS: BinaryKind[] = [
">",
"<",
"==",
"<=",
">=",
"!=",
];
export const LOGICAL_KINDS: BinaryKind[] = ["&", "|"];
export const ARITH_TERM_KINDS: BinaryKind[] = ["+", "-"];
export const ARITH_FACTOR_KINDS: BinaryKind[] = ["*", "/"];
const BINARY_KIND_PREC_CLASS = new Map<BinaryKind, number>([
["+", 0],
["-", 0],
["*", 0],
["/", 0],
["&", 1],
["|", 2],
["<", 3],
[">", 4],
["==", 5],
["<=", 6],
[">=", 7],
["!=", 8],
]);
export function binaryExprPrecedenceClass(k: BinaryKind): number {
const cls = BINARY_KIND_PREC_CLASS.get(k);
if (!cls) {
throw new Error(`Invalid binary kind: ${k}`);
}
return cls;
} }
export type UnaryKind = '!' | '-';
export const UNARY_KINDS: UnaryKind[] = ['!', '-'];

View file

@ -3,6 +3,13 @@ export type Span = {
end: number; end: number;
}; };
export function spanMerge(a: Span, b: Span): Span {
return {
start: Math.min(a.start, b.start),
end: Math.max(a.end, b.end),
};
}
export class CompilerError extends Error { export class CompilerError extends Error {
msg: string; msg: string;
span: Span; span: Span;

View file

@ -1,18 +1,30 @@
import { withErrorHandler } from "./error"; import { withErrorHandler } from "./error";
import { tokenize } from "./lexer"; import { tokenize } from "./lexer";
import { parse } from "./parser"; import { parse } from "./parser";
import { printAst } from "./printer";
const input = ` const input = `
function hello() {} function main() = (
print("Hello, world!");
"uwu";
);
`; `;
function main() { function main() {
withErrorHandler(input, () => { withErrorHandler(input, () => {
const tokens = tokenize(input); const tokens = tokenize(input);
console.log("-----TOKENS---");
console.log(tokens); console.log(tokens);
const ast = parse(tokens); const ast = parse(tokens);
console.log(ast); console.log("-----AST------");
console.dir(ast, { depth: 10 });
const printed = printAst(ast);
console.log("-----AST pretty------");
console.log(printed);
}); });
} }

View file

@ -1,17 +1,17 @@
import { tokenize } from "./lexer"; import { tokenize } from "./lexer";
it('should tokenize an emtpy function', () => { it("should tokenize an emtpy function", () => {
const input = `function hello() {}`; const input = `function hello() = {}`;
const tokens = tokenize(input); const tokens = tokenize(input);
expect(tokens).toMatchSnapshot(); expect(tokens).toMatchSnapshot();
}); });
it('should tokenize hello world', () => { it("should tokenize hello world", () => {
const input = `print("hello world")`; const input = `print("hello world")`;
const tokens = tokenize(input); const tokens = tokenize(input);
expect(tokens).toMatchSnapshot(); expect(tokens).toMatchSnapshot();
}); });

View file

@ -1,23 +1,62 @@
import { CompilerError, Span } from "./error"; import { CompilerError, Span } from "./error";
export type DatalessToken = export type DatalessToken =
| "kw_function" | "function"
| "kw_let" | "let"
| "p_popen" | "in"
| "p_pclose" | "("
| "p_bopen" | ")"
| "p_bclose" | ";"
| "p_semi"; | ","
| "="
| "+"
| "-"
| "*"
| "/"
| "&"
| "|"
| "!"
| "<"
| ">"
| "=="
| "<="
| ">="
| "!="
| "!";
export type TokenKind = export type TokenIdent = { kind: "identifier"; ident: string };
| { kind: DatalessToken }
| { kind: "identifier"; ident: string } export type TokenLit =
| { kind: "lit_string"; value: string }; | {
kind: "lit_string";
value: string;
}
| {
kind: "lit_int";
value: number;
};
export type TokenKind = { kind: DatalessToken } | TokenIdent | TokenLit;
export type Token = TokenKind & { export type Token = TokenKind & {
span: Span; span: Span;
}; };
export type BaseToken = { kind: Token["kind"] };
const SINGLE_PUNCT: string[] = [
"(",
")",
";",
",",
"+",
"-",
"*",
"/",
"&",
"|",
];
export function tokenize(input: string): Token[] { export function tokenize(input: string): Token[] {
const tokens: Token[] = []; const tokens: Token[] = [];
let i = 0; let i = 0;
@ -25,65 +64,102 @@ export function tokenize(input: string): Token[] {
finish: while (i < input.length) { finish: while (i < input.length) {
const next = input[i]; const next = input[i];
const span: Span = { start: i, end: i + 1 }; const span: Span = { start: i, end: i + 1 };
switch (next) {
case undefined: { if (SINGLE_PUNCT.includes(next)) {
break finish; tokens.push({ kind: next as DatalessToken, span });
} } else {
case "(": { switch (next) {
tokens.push({ kind: "p_popen", span }); case undefined: {
break; break finish;
}
case ")": {
tokens.push({ kind: "p_pclose", span });
break;
}
case "{": {
tokens.push({ kind: "p_bopen", span });
break;
}
case "}": {
tokens.push({ kind: "p_bclose", span });
break;
}
case ";": {
tokens.push({ kind: "p_semi", span });
break;
}
case '"': {
while (true) {
const next = input[i + 1];
span.end++;
i++;
if (next === '"') {
break;
}
if (next === undefined) {
throw new CompilerError(`Unterminated string literal`, span);
}
} }
const value = input.slice(span.start + 1, span.end - 1); case "=": {
tokens.push({ kind: "lit_string", span, value }); if (input[i + 1] === "=") {
break;
}
default: {
if (isDigit(next)) {
throw new Error("digit");
} else if (isIdentStart(next)) {
while (isIdentContinue(input[i + 1])) {
span.end++; span.end++;
i++; i++;
} tokens.push({ kind: "==", span });
const ident = input.slice(span.start, span.end);
let kw = isKeyword(ident);
if (kw) {
tokens.push({ kind: kw, span });
} else { } else {
tokens.push({ kind: "identifier", span, ident: ident }); tokens.push({ kind: "=", span });
}
break;
}
case ">": {
if (input[i + 1] === "=") {
span.end++;
i++;
tokens.push({ kind: ">=", span });
} else {
tokens.push({ kind: ">", span });
}
break;
}
case "<": {
if (input[i + 1] === "=") {
span.end++;
i++;
tokens.push({ kind: "<=", span });
} else {
tokens.push({ kind: "<", span });
}
break;
}
case "!": {
if (input[i + 1] === "=") {
span.end++;
i++;
tokens.push({ kind: "!=", span });
} else {
tokens.push({ kind: "!", span });
}
break;
}
case '"': {
while (true) {
const next = input[i + 1];
span.end++;
i++;
if (next === '"') {
break;
}
if (next === undefined) {
throw new CompilerError(`Unterminated string literal`, span);
}
}
const value = input.slice(span.start + 1, span.end - 1);
tokens.push({ kind: "lit_string", span, value });
break;
}
default: {
if (isDigit(next)) {
while (isDigit(input[i + 1])) {
span.end++;
i++;
}
const digit = input.slice(span.start, span.end);
const int = parseInt(digit, 10);
if (Number.isNaN(int)) {
throw new Error(
`\`${digit}\` was tokenized to a number even though it is not`
);
}
tokens.push({ kind: "lit_int", value: int, span });
} else if (isIdentStart(next)) {
while (isIdentContinue(input[i + 1])) {
span.end++;
i++;
}
const ident = input.slice(span.start, span.end);
let kw = isKeyword(ident);
if (kw) {
tokens.push({ kind: kw, span });
} else {
tokens.push({ kind: "identifier", span, ident: ident });
}
} else if (isWhitespace(next)) {
// ignore
} else {
throw new CompilerError(`Invalid character: \`${next}\``, span);
} }
} else if (isWhitespace(next)) {
// ignore
} else {
throw new CompilerError(`Invalid character: \`${next}\``, span);
} }
} }
} }
@ -117,10 +193,7 @@ function isWhitespace(char: string): boolean {
return char === " " || char === "\t" || char === "\n" || char === "\r"; return char === " " || char === "\t" || char === "\n" || char === "\r";
} }
const keywords = new Map<string, DatalessToken>([ const keywords = new Set<string>(["function", "let", "in"]);
["function", "kw_function"],
["let", "kw_let"],
]);
function isKeyword(kw: string): DatalessToken | undefined { function isKeyword(kw: string): DatalessToken | undefined {
return keywords.get(kw); return keywords.has(kw) ? (kw as DatalessToken) : undefined;
} }

View file

@ -1,45 +1,282 @@
import { FunctionDef, Item } from "./ast"; import {
ARITH_FACTOR_KINDS,
ARITH_TERM_KINDS,
BinaryKind,
COMPARISON_KINDS,
Expr,
FunctionDef,
Item,
LOGICAL_KINDS,
UNARY_KINDS,
UnaryKind,
} from "./ast";
import { CompilerError, todo } from "./error"; import { CompilerError, todo } from "./error";
import { Token } from "./lexer"; import { BaseToken, Token, TokenIdent } from "./lexer";
type Parser<T> = (t: Token[]) => [Token[], T];
export function parse(t: Token[]): Item[] { export function parse(t: Token[]): Item[] {
const items: Item[] = []; const items: Item[] = [];
while (t.length > 0) { while (t.length > 0) {
let item; let item;
[t, item] = parseItem(t); [t, item] = parseItem(t);
items.push(item); items.push(item);
} }
return items; return items;
} }
function parseItem(t: Token[]): [Token[], Item] { function parseItem(t: Token[]): [Token[], Item] {
let next; let tok;
[t, next] = nextT(t); [t, tok] = next(t);
if (next.kind === "kw_function") { if (tok.kind === "function") {
let name;
[t, name] = expectNext<TokenIdent>(t, "identifier");
const def: FunctionDef = { [t] = expectNext(t, "(");
name: "", [t] = expectNext(t, ")");
args: [], [t] = expectNext(t, "=");
body: todo("todo", next.span)
}
return [t, {kind: "function", node: def, span: next.span}] let body;
} else { [t, body] = parseExpr(t);
unexpectedToken(next);
} [t] = expectNext(t, ";");
const def: FunctionDef = {
name: name.ident,
args: [],
body,
};
return [t, { kind: "function", node: def, span: tok.span }];
} else {
unexpectedToken(tok);
}
} }
function nextT(t: Token[]): [Token[], Token] { function parseExpr(t: Token[]): [Token[], Expr] {
const next = t[0]; /*
if (!next) { EXPR = { "let" NAME "=" EXPR "in" EXPR | COMPARISON }
throw new CompilerError("unexpected end of file", {start: Number.MAX_SAFE_INTEGER, end: Number.MAX_SAFE_INTEGER})
// The precende here is pretty arbitrary since we forbid mixing of operators
// with different precedence classes anyways.
COMPARISON = LOGICAL { ( ">" | "<" | "==" | "<=" | ">=" | "!=" ) COMPARISON }
LOGICAL = ARITH_TERM { ( "&" | "|" ) LOGICAL }
// Here it matters though.
ARITH_TERM = ATOM { ( "+" | "-" ) ARITH_TERM }
ARITH_FACTOR = UNARY { ( "*" | "/" ) ARITH_FACTOR }
UNARY = { "!" | "-" } CALL
CALL = ATOM { "(" EXPR_LIST ")" }
ATOM = "(" { EXPR ";" } EXPR ")" | IDENT | LITERAL | EMPTY
EMPTY =
EXPR_LIST = { EXPR { "," EXPR } { "," } }
*/
const [, peak] = next(t);
if (peak.kind === "let") {
[t] = next(t);
let name;
[t, name] = expectNext<TokenIdent>(t, "identifier");
expectNext(t, "=");
let rhs;
[t, rhs] = parseExpr(t);
expectNext(t, "in");
let after;
[t, after] = parseExpr(t);
return [t, { kind: "let", name: name.ident, rhs, after, span: t[0].span }];
}
return parseExprComparison(t);
}
function mkParserExprBinary(
lower: Parser<Expr>,
kinds: string[]
): Parser<Expr> {
function parser(t: Token[]): [Token[], Expr] {
let lhs;
[t, lhs] = lower(t);
const [, peak] = next(t);
if (kinds.includes(peak.kind)) {
[t] = next(t);
let rhs;
[t, rhs] = parser(t);
const span = peak.span;
return [
t,
{ kind: "binary", binaryKind: peak.kind as BinaryKind, lhs, rhs, span },
];
} }
const rest = t.slice(1);
return [rest, next]; return [t, lhs];
}
return parser;
}
const parseExprArithFactor = mkParserExprBinary(
parseExprUnary,
ARITH_FACTOR_KINDS
);
const parseExprArithTerm = mkParserExprBinary(
parseExprArithFactor,
ARITH_TERM_KINDS
);
const parseExprLogical = mkParserExprBinary(parseExprArithTerm, LOGICAL_KINDS);
const parseExprComparison = mkParserExprBinary(
parseExprLogical,
COMPARISON_KINDS
);
function parseExprUnary(t: Token[]): [Token[], Expr] {
const [, peak] = next(t);
if (peak.kind in UNARY_KINDS) {
let rhs;
[t, rhs] = parseExprUnary(t);
return [
t,
{
kind: "unary",
unaryKind: peak.kind as UnaryKind,
rhs,
span: peak.span,
},
];
}
return parseExprCall(t);
}
function parseExprCall(t: Token[]): [Token[], Expr] {
let lhs: Expr;
[t, lhs] = parseExprAtom(t);
while (next(t)[1].kind === "(") {
let popen;
[t, popen] = next(t);
const args = [];
while (next(t)[1].kind !== ")") {
let arg;
[t, arg] = parseExpr(t);
args.push(arg);
// TODO i think this is incorrect
[t] = eat(t, ",");
}
[t] = expectNext(t, ")");
lhs = { kind: "call", span: popen.span, lhs, args };
}
return [t, lhs];
}
function parseExprAtom(startT: Token[]): [Token[], Expr] {
let [t, tok] = next(startT);
if (tok.kind === "(") {
let expr: Expr;
[t, expr] = parseExpr(t);
const exprs = [expr];
while (next(t)[1].kind !== ")") {
[t] = expectNext(t, ";");
[t, expr] = parseExpr(t);
exprs.push(expr);
}
[t] = expectNext(t, ")");
return [t, { kind: "block", span: tok.span, exprs }];
}
if (tok.kind === "lit_string") {
return [
t,
{
kind: "literal",
span: tok.span,
value: { kind: "str", value: tok.value },
},
];
}
if (tok.kind === "lit_int") {
return [
t,
{
kind: "literal",
span: tok.span,
value: { kind: "int", value: tok.value },
},
];
}
if (tok.kind === "identifier") {
return [t, { kind: "ident", span: tok.span, value: tok.ident }];
}
// Parse nothing at all.
return [startT, { kind: "empty", span: tok.span }];
}
// helpers
function eat<T extends BaseToken>(
t: Token[],
kind: T["kind"]
): [Token[], T | undefined] {
const [tnext, tok] = next(t);
if (tok.kind === kind) {
return [tnext, tok as unknown as T];
}
return [t, undefined];
}
function expectNext<T extends BaseToken>(
t: Token[],
kind: T["kind"]
): [Token[], T] {
let tok;
[t, tok] = next(t);
const token = expectToken(kind, tok);
return [t, token];
}
function next(t: Token[]): [Token[], Token] {
const [rest, next] = maybeNextT(t);
if (!next) {
throw new CompilerError("unexpected end of file", {
start: Number.MAX_SAFE_INTEGER,
end: Number.MAX_SAFE_INTEGER,
});
}
return [rest, next];
}
function maybeNextT(t: Token[]): [Token[], Token | undefined] {
const next = t[0];
const rest = t.slice(1);
return [rest, next];
} }
function unexpectedToken(token: Token): never { function unexpectedToken(token: Token): never {
throw new CompilerError("unexpected token", token.span); throw new CompilerError("unexpected token", token.span);
}
function expectToken<T extends BaseToken>(kind: T["kind"], token: Token): T {
if (token.kind !== kind) {
throw new CompilerError(
`expected ${kind}, found ${token.kind}`,
token.span
);
}
return token as unknown as T;
} }

96
src/printer.ts Normal file
View file

@ -0,0 +1,96 @@
import { Expr, FunctionDef, Item } from "./ast";
export function printAst(ast: Item[]): string {
return ast.map(printItem).join("\n");
}
function printItem(item: Item): string {
switch (item.kind) {
case "function": {
return printFunction(item.node);
}
}
}
function printFunction(func: FunctionDef): string {
const args = func.args.map(({ name }) => name).join(", ");
return `function ${func.name}(${args}) = ${printExpr(func.body, 0)}`;
}
function printExpr(expr: Expr, indent: number): string {
switch (expr.kind) {
case "empty": {
return "";
}
case "let": {
return `let ${expr.name} = ${printExpr(expr.rhs, 1)} in${linebreak(
indent
)}`;
}
case "block": {
const exprs = expr.exprs.map((expr) => printExpr(expr, indent + 1));
if (exprs.length === 1) {
return `(${exprs[0]})`;
}
const shortExprs =
exprs.map((s) => s.length).reduce((a, b) => a + b, 0) < 40;
if (shortExprs) {
const alreadyHasTrailingSpace = expr.exprs[exprs.length - 1]?.kind === "empty";
const trailingSpace = alreadyHasTrailingSpace ? "" : " ";
return `( ${exprs.join("; ")}${trailingSpace})`;
} else {
const joiner = `;${linebreak(indent + 1)}`;
return (
`(${linebreak(indent + 1)}` +
`${exprs.join(joiner)}` +
`${linebreak(indent)})`
);
}
}
case "literal": {
switch (expr.value.kind) {
case "str": {
return `"${expr.value.value}"`;
}
case "int": {
return `${expr.value.value}`;
}
}
}
case "ident": {
return expr.value;
}
case "binary": {
return `${printExpr(expr.lhs, indent)} ${expr.binaryKind} ${printExpr(
expr.rhs,
indent
)}`;
}
case "unary": {
return `${expr.unaryKind}${printExpr(expr.rhs, indent)}`;
}
case "call": {
const args = expr.args.map((arg) => printExpr(arg, indent + 1));
const shortArgs =
args.map((s) => s.length).reduce((a, b) => a + b, 0) < 40;
if (shortArgs) {
return `${printExpr(expr.lhs, indent)}(${args.join(", ")})`;
} else {
return (
`${printExpr(expr.lhs, indent)}(${linebreak(indent + 1)}` +
`${args.join(linebreak(indent + 1))}` +
`${linebreak(indent)})`
);
}
}
}
}
function linebreak(indent: number): string {
return `\n${ind(indent)}`;
}
function ind(indent: number): string {
return " ".repeat(indent * 2);
}

View file

@ -95,7 +95,7 @@
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */ // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
"noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ "noFallthroughCasesInSwitch": true /* Enable error reporting for fallthrough cases in switch statements. */,
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */ // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */ // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */