This commit is contained in:
nora 2023-07-23 14:02:53 +02:00
parent 91b183c002
commit 4e95bc05a3
9 changed files with 640 additions and 132 deletions

View file

@ -1,23 +1,62 @@
import { CompilerError, Span } from "./error";
export type DatalessToken =
| "kw_function"
| "kw_let"
| "p_popen"
| "p_pclose"
| "p_bopen"
| "p_bclose"
| "p_semi";
| "function"
| "let"
| "in"
| "("
| ")"
| ";"
| ","
| "="
| "+"
| "-"
| "*"
| "/"
| "&"
| "|"
| "!"
| "<"
| ">"
| "=="
| "<="
| ">="
| "!="
| "!";
export type TokenKind =
| { kind: DatalessToken }
| { kind: "identifier"; ident: string }
| { kind: "lit_string"; value: string };
export type TokenIdent = { kind: "identifier"; ident: string };
export type TokenLit =
| {
kind: "lit_string";
value: string;
}
| {
kind: "lit_int";
value: number;
};
export type TokenKind = { kind: DatalessToken } | TokenIdent | TokenLit;
export type Token = TokenKind & {
span: Span;
};
export type BaseToken = { kind: Token["kind"] };
const SINGLE_PUNCT: string[] = [
"(",
")",
";",
",",
"+",
"-",
"*",
"/",
"&",
"|",
];
export function tokenize(input: string): Token[] {
const tokens: Token[] = [];
let i = 0;
@ -25,65 +64,102 @@ export function tokenize(input: string): Token[] {
finish: while (i < input.length) {
const next = input[i];
const span: Span = { start: i, end: i + 1 };
switch (next) {
case undefined: {
break finish;
}
case "(": {
tokens.push({ kind: "p_popen", span });
break;
}
case ")": {
tokens.push({ kind: "p_pclose", span });
break;
}
case "{": {
tokens.push({ kind: "p_bopen", span });
break;
}
case "}": {
tokens.push({ kind: "p_bclose", span });
break;
}
case ";": {
tokens.push({ kind: "p_semi", span });
break;
}
case '"': {
while (true) {
const next = input[i + 1];
span.end++;
i++;
if (next === '"') {
break;
}
if (next === undefined) {
throw new CompilerError(`Unterminated string literal`, span);
}
if (SINGLE_PUNCT.includes(next)) {
tokens.push({ kind: next as DatalessToken, span });
} else {
switch (next) {
case undefined: {
break finish;
}
const value = input.slice(span.start + 1, span.end - 1);
tokens.push({ kind: "lit_string", span, value });
break;
}
default: {
if (isDigit(next)) {
throw new Error("digit");
} else if (isIdentStart(next)) {
while (isIdentContinue(input[i + 1])) {
case "=": {
if (input[i + 1] === "=") {
span.end++;
i++;
}
const ident = input.slice(span.start, span.end);
let kw = isKeyword(ident);
if (kw) {
tokens.push({ kind: kw, span });
tokens.push({ kind: "==", span });
} else {
tokens.push({ kind: "identifier", span, ident: ident });
tokens.push({ kind: "=", span });
}
break;
}
case ">": {
if (input[i + 1] === "=") {
span.end++;
i++;
tokens.push({ kind: ">=", span });
} else {
tokens.push({ kind: ">", span });
}
break;
}
case "<": {
if (input[i + 1] === "=") {
span.end++;
i++;
tokens.push({ kind: "<=", span });
} else {
tokens.push({ kind: "<", span });
}
break;
}
case "!": {
if (input[i + 1] === "=") {
span.end++;
i++;
tokens.push({ kind: "!=", span });
} else {
tokens.push({ kind: "!", span });
}
break;
}
case '"': {
while (true) {
const next = input[i + 1];
span.end++;
i++;
if (next === '"') {
break;
}
if (next === undefined) {
throw new CompilerError(`Unterminated string literal`, span);
}
}
const value = input.slice(span.start + 1, span.end - 1);
tokens.push({ kind: "lit_string", span, value });
break;
}
default: {
if (isDigit(next)) {
while (isDigit(input[i + 1])) {
span.end++;
i++;
}
const digit = input.slice(span.start, span.end);
const int = parseInt(digit, 10);
if (Number.isNaN(int)) {
throw new Error(
`\`${digit}\` was tokenized to a number even though it is not`
);
}
tokens.push({ kind: "lit_int", value: int, span });
} else if (isIdentStart(next)) {
while (isIdentContinue(input[i + 1])) {
span.end++;
i++;
}
const ident = input.slice(span.start, span.end);
let kw = isKeyword(ident);
if (kw) {
tokens.push({ kind: kw, span });
} else {
tokens.push({ kind: "identifier", span, ident: ident });
}
} else if (isWhitespace(next)) {
// ignore
} else {
throw new CompilerError(`Invalid character: \`${next}\``, span);
}
} else if (isWhitespace(next)) {
// ignore
} else {
throw new CompilerError(`Invalid character: \`${next}\``, span);
}
}
}
@ -117,10 +193,7 @@ function isWhitespace(char: string): boolean {
return char === " " || char === "\t" || char === "\n" || char === "\r";
}
const keywords = new Map<string, DatalessToken>([
["function", "kw_function"],
["let", "kw_let"],
]);
const keywords = new Set<string>(["function", "let", "in"]);
function isKeyword(kw: string): DatalessToken | undefined {
return keywords.get(kw);
return keywords.has(kw) ? (kw as DatalessToken) : undefined;
}