some stuff

This commit is contained in:
nora 2023-07-23 11:51:59 +02:00
parent ef32e646d6
commit 91b183c002
11 changed files with 4320 additions and 101 deletions

View file

@ -0,0 +1,84 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`should tokenize an emtpy function 1`] = `
[
{
"kind": "kw_function",
"span": {
"end": 8,
"start": 0,
},
},
{
"ident": "hello",
"kind": "identifier",
"span": {
"end": 14,
"start": 9,
},
},
{
"kind": "p_popen",
"span": {
"end": 15,
"start": 14,
},
},
{
"kind": "p_pclose",
"span": {
"end": 16,
"start": 15,
},
},
{
"kind": "p_bopen",
"span": {
"end": 18,
"start": 17,
},
},
{
"kind": "p_bclose",
"span": {
"end": 19,
"start": 18,
},
},
]
`;
exports[`should tokenize hello world 1`] = `
[
{
"ident": "print",
"kind": "identifier",
"span": {
"end": 5,
"start": 0,
},
},
{
"kind": "p_popen",
"span": {
"end": 6,
"start": 5,
},
},
{
"kind": "lit_string",
"span": {
"end": 19,
"start": 6,
},
"value": "hello world",
},
{
"kind": "p_pclose",
"span": {
"end": 20,
"start": 19,
},
},
]
`;

33
src/ast.ts Normal file
View file

@ -0,0 +1,33 @@
import { Span } from "./error";
export type ItemKind = {
kind: "function",
node: FunctionDef,
};
export type Item = ItemKind & {
span: Span,
}
export type FunctionDef = {
name: string,
args: FunctionArg[],
body: Expr,
}
export type FunctionArg = {
name: string,
span: Span,
}
export type ExprKind = {
kind: "lit_string",
value: string,
} | {
kind: "ident",
value: string,
}
export type Expr = ExprKind & {
span: Span,
}

12
src/error.test.ts Normal file
View file

@ -0,0 +1,12 @@
import { Span, lines } from "./error";
it("should extract lines correctly", () => {
const input = "AAA\nmeow\n:3\n\n";
const lineSpans = lines(input);
const lineContents = lineSpans.map(({ start, end }) =>
input.slice(start, end)
);
expect(lineContents).toStrictEqual(["AAA", "meow", ":3", "", ""]);
});

74
src/error.ts Normal file
View file

@ -0,0 +1,74 @@
export type Span = {
start: number;
end: number;
};
export class CompilerError extends Error {
msg: string;
span: Span;
constructor(msg: string, span: Span) {
super(msg);
this.msg = msg;
this.span = span;
}
}
export function withErrorHandler(input: string, f: () => void) {
try {
f();
} catch (e) {
if (e instanceof CompilerError) {
renderError(input, e);
} else {
throw e;
}
}
}
function renderError(input: string, e: CompilerError) {
const lineSpans = lines(input);
const line =
e.span.start === Number.MAX_SAFE_INTEGER
? lineSpans[lineSpans.length - 1]
: lineSpans.find(
(line) => line.start <= e.span.start && line.end >= e.span.start
);
if (!line) {
throw Error(`Span out of bounds: ${e.span.start}..${e.span.end}`);
}
const lineIdx = lineSpans.indexOf(line);
console.error(`error: ${e.message}`);
console.error(`${lineIdx} | ${spanToSnippet(input, line)}`);
const startRelLine =
e.span.start === Number.MAX_SAFE_INTEGER ? 0 : e.span.start - line.start;
console.error(
`${" ".repeat(String(lineIdx).length)} ${" ".repeat(startRelLine)}^`
);
}
function spanToSnippet(input: string, span: Span): string {
if (span.start === Number.MAX_SAFE_INTEGER) {
return "";
}
return input.slice(span.start, span.end);
}
export function lines(input: string): Span[] {
const lines: Span[] = [{ start: 0, end: 0 }];
for (let i = 0; i < input.length; i++) {
if (input[i] === "\n") {
lines.push({ start: i + 1, end: i + 1 });
} else {
lines[lines.length - 1].end++;
}
}
return lines;
}
export function todo(msg: string, span: Span): never {
throw new CompilerError(`TODO: ${msg}`, span);
}

View file

@ -1,102 +1,19 @@
import { withErrorHandler } from "./error";
import { tokenize } from "./lexer";
import { parse } from "./parser";
const input = `
function ok() {}
function hello() {}
`;
function main() {
const tokens = tokenize(input);
console.log(tokens);
}
withErrorHandler(input, () => {
const tokens = tokenize(input);
console.log(tokens);
type Span = {
start: number;
len: number;
};
type DatalessToken =
| "kw_function"
| "p_popen"
| "p_pclose"
| "p_bopen"
| "p_bclose";
type TokenKind =
| { kind: DatalessToken }
| { kind: "identifier"; ident: string };
type Token = TokenKind & {
span: Span;
};
function tokenize(input: string): Token[] {
const tokens: Token[] = [];
let i = 0;
finish: while (i < input.length) {
const next = input[i];
const span: Span = { start: i, len: 1 };
switch (next) {
case undefined: {
break finish;
}
case "(": {
tokens.push({ kind: "p_popen", span });
break;
}
case ")": {
tokens.push({ kind: "p_pclose", span });
break;
}
case "{": {
tokens.push({ kind: "p_bopen", span });
break;
}
case "}": {
tokens.push({ kind: "p_bclose", span });
break;
}
default: {
if (isDigit(next)) {
throw new Error("digit");
} else if (isIdentStart(next)) {
while (isIdentContinue(input[i + 1])) {
span.len++;
i++;
}
const ident = input.slice(span.start, span.start + span.len);
tokens.push({ kind: "identifier", span, ident: ident });
} else if (isWhitespace(next)) {
// ignore
}
}
}
i++;
}
return tokens;
}
function isIdentStart(char: string): boolean {
return (
(char <= "Z" && char >= "A") || (char <= "z" && char >= "a") || char === "_"
);
}
function isIdentContinue(char: string): boolean {
return (
(char <= "Z" && char >= "A") ||
(char <= "z" && char >= "a") ||
char === "_" ||
isDigit(char)
);
}
function isDigit(char: string): boolean {
return !Number.isNaN(parseInt(char, 10));
}
function isWhitespace(char: string): boolean {
return char === " " || char === "\t" || char === "\n" || char === "\r";
const ast = parse(tokens);
console.log(ast);
});
}
main();

17
src/lexer.test.ts Normal file
View file

@ -0,0 +1,17 @@
import { tokenize } from "./lexer";
it('should tokenize an emtpy function', () => {
const input = `function hello() {}`;
const tokens = tokenize(input);
expect(tokens).toMatchSnapshot();
});
it('should tokenize hello world', () => {
const input = `print("hello world")`;
const tokens = tokenize(input);
expect(tokens).toMatchSnapshot();
});

126
src/lexer.ts Normal file
View file

@ -0,0 +1,126 @@
import { CompilerError, Span } from "./error";
export type DatalessToken =
| "kw_function"
| "kw_let"
| "p_popen"
| "p_pclose"
| "p_bopen"
| "p_bclose"
| "p_semi";
export type TokenKind =
| { kind: DatalessToken }
| { kind: "identifier"; ident: string }
| { kind: "lit_string"; value: string };
export type Token = TokenKind & {
span: Span;
};
export function tokenize(input: string): Token[] {
const tokens: Token[] = [];
let i = 0;
finish: while (i < input.length) {
const next = input[i];
const span: Span = { start: i, end: i + 1 };
switch (next) {
case undefined: {
break finish;
}
case "(": {
tokens.push({ kind: "p_popen", span });
break;
}
case ")": {
tokens.push({ kind: "p_pclose", span });
break;
}
case "{": {
tokens.push({ kind: "p_bopen", span });
break;
}
case "}": {
tokens.push({ kind: "p_bclose", span });
break;
}
case ";": {
tokens.push({ kind: "p_semi", span });
break;
}
case '"': {
while (true) {
const next = input[i + 1];
span.end++;
i++;
if (next === '"') {
break;
}
if (next === undefined) {
throw new CompilerError(`Unterminated string literal`, span);
}
}
const value = input.slice(span.start + 1, span.end - 1);
tokens.push({ kind: "lit_string", span, value });
break;
}
default: {
if (isDigit(next)) {
throw new Error("digit");
} else if (isIdentStart(next)) {
while (isIdentContinue(input[i + 1])) {
span.end++;
i++;
}
const ident = input.slice(span.start, span.end);
let kw = isKeyword(ident);
if (kw) {
tokens.push({ kind: kw, span });
} else {
tokens.push({ kind: "identifier", span, ident: ident });
}
} else if (isWhitespace(next)) {
// ignore
} else {
throw new CompilerError(`Invalid character: \`${next}\``, span);
}
}
}
i++;
}
return tokens;
}
function isIdentStart(char: string): boolean {
return (
(char <= "Z" && char >= "A") || (char <= "z" && char >= "a") || char === "_"
);
}
function isIdentContinue(char: string): boolean {
return (
(char <= "Z" && char >= "A") ||
(char <= "z" && char >= "a") ||
char === "_" ||
isDigit(char)
);
}
function isDigit(char: string): boolean {
return !Number.isNaN(parseInt(char, 10));
}
function isWhitespace(char: string): boolean {
return char === " " || char === "\t" || char === "\n" || char === "\r";
}
const keywords = new Map<string, DatalessToken>([
["function", "kw_function"],
["let", "kw_let"],
]);
function isKeyword(kw: string): DatalessToken | undefined {
return keywords.get(kw);
}

45
src/parser.ts Normal file
View file

@ -0,0 +1,45 @@
import { FunctionDef, Item } from "./ast";
import { CompilerError, todo } from "./error";
import { Token } from "./lexer";
export function parse(t: Token[]): Item[] {
const items: Item[] = [];
while (t.length > 0) {
let item;
[t, item] = parseItem(t);
items.push(item);
}
return items;
}
function parseItem(t: Token[]): [Token[], Item] {
let next;
[t, next] = nextT(t);
if (next.kind === "kw_function") {
const def: FunctionDef = {
name: "",
args: [],
body: todo("todo", next.span)
}
return [t, {kind: "function", node: def, span: next.span}]
} else {
unexpectedToken(next);
}
}
function nextT(t: Token[]): [Token[], Token] {
const next = t[0];
if (!next) {
throw new CompilerError("unexpected end of file", {start: Number.MAX_SAFE_INTEGER, end: Number.MAX_SAFE_INTEGER})
}
const rest = t.slice(1);
return [rest, next];
}
function unexpectedToken(token: Token): never {
throw new CompilerError("unexpected token", token.span);
}