Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import type { Token, TokenType } from "./lexer"; | |
import { TOKEN_TYPES } from "./lexer"; | |
import type { Statement } from "./ast"; | |
import { | |
Program, | |
If, | |
For, | |
SetStatement, | |
MemberExpression, | |
CallExpression, | |
Identifier, | |
NumericLiteral, | |
StringLiteral, | |
BooleanLiteral, | |
ArrayLiteral, | |
ObjectLiteral, | |
BinaryExpression, | |
FilterExpression, | |
TestExpression, | |
UnaryExpression, | |
SliceExpression, | |
KeywordArgumentExpression, | |
TupleLiteral, | |
} from "./ast"; | |
/** | |
* Generate the Abstract Syntax Tree (AST) from a list of tokens. | |
* Operator precedence can be found here: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_precedence#table | |
*/ | |
export function parse(tokens: Token[]): Program { | |
const program = new Program([]); | |
let current = 0; | |
/** | |
* Consume the next token if it matches the expected type, otherwise throw an error. | |
* @param type The expected token type | |
* @param error The error message to throw if the token does not match the expected type | |
* @returns The consumed token | |
*/ | |
function expect(type: string, error: string): Token { | |
const prev = tokens[current++]; | |
if (!prev || prev.type !== type) { | |
throw new Error(`Parser Error: ${error}. ${prev.type} !== ${type}.`); | |
} | |
return prev; | |
} | |
function parseAny(): Statement { | |
switch (tokens[current].type) { | |
case TOKEN_TYPES.Text: | |
return parseText(); | |
case TOKEN_TYPES.OpenStatement: | |
return parseJinjaStatement(); | |
case TOKEN_TYPES.OpenExpression: | |
return parseJinjaExpression(); | |
default: | |
throw new SyntaxError(`Unexpected token type: ${tokens[current].type}`); | |
} | |
} | |
function not(...types: TokenType[]): boolean { | |
return current + types.length <= tokens.length && types.some((type, i) => type !== tokens[current + i].type); | |
} | |
function is(...types: TokenType[]): boolean { | |
return current + types.length <= tokens.length && types.every((type, i) => type === tokens[current + i].type); | |
} | |
function parseText(): StringLiteral { | |
return new StringLiteral(expect(TOKEN_TYPES.Text, "Expected text token").value); | |
} | |
function parseJinjaStatement(): Statement { | |
// Consume {% %} tokens | |
expect(TOKEN_TYPES.OpenStatement, "Expected opening statement token"); | |
let result; | |
switch (tokens[current].type) { | |
case TOKEN_TYPES.Set: | |
++current; | |
result = parseSetStatement(); | |
expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); | |
break; | |
case TOKEN_TYPES.If: | |
++current; | |
result = parseIfStatement(); | |
expect(TOKEN_TYPES.OpenStatement, "Expected {% token"); | |
expect(TOKEN_TYPES.EndIf, "Expected endif token"); | |
expect(TOKEN_TYPES.CloseStatement, "Expected %} token"); | |
break; | |
case TOKEN_TYPES.For: | |
++current; | |
result = parseForStatement(); | |
expect(TOKEN_TYPES.OpenStatement, "Expected {% token"); | |
expect(TOKEN_TYPES.EndFor, "Expected endfor token"); | |
expect(TOKEN_TYPES.CloseStatement, "Expected %} token"); | |
break; | |
default: | |
throw new SyntaxError(`Unknown statement type: ${tokens[current].type}`); | |
} | |
return result; | |
} | |
function parseJinjaExpression(): Statement { | |
// Consume {{ }} tokens | |
expect(TOKEN_TYPES.OpenExpression, "Expected opening expression token"); | |
const result = parseExpression(); | |
expect(TOKEN_TYPES.CloseExpression, "Expected closing expression token"); | |
return result; | |
} | |
// NOTE: `set` acts as both declaration statement and assignment expression | |
function parseSetStatement(): Statement { | |
const left = parseExpression(); | |
if (is(TOKEN_TYPES.Equals)) { | |
++current; | |
const value = parseSetStatement(); | |
return new SetStatement(left, value); | |
} | |
return left; | |
} | |
function parseIfStatement(): If { | |
const test = parseExpression(); | |
expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); | |
const body: Statement[] = []; | |
const alternate: Statement[] = []; | |
// Keep parsing if body until we reach the first {% elif %} or {% else %} or {% endif %} | |
while ( | |
!( | |
tokens[current]?.type === TOKEN_TYPES.OpenStatement && | |
(tokens[current + 1]?.type === TOKEN_TYPES.ElseIf || | |
tokens[current + 1]?.type === TOKEN_TYPES.Else || | |
tokens[current + 1]?.type === TOKEN_TYPES.EndIf) | |
) | |
) { | |
body.push(parseAny()); | |
} | |
// Alternate branch: Check for {% elif %} or {% else %} | |
if ( | |
tokens[current]?.type === TOKEN_TYPES.OpenStatement && | |
tokens[current + 1]?.type !== TOKEN_TYPES.EndIf // There is some body | |
) { | |
++current; // eat {% token | |
if (is(TOKEN_TYPES.ElseIf)) { | |
expect(TOKEN_TYPES.ElseIf, "Expected elseif token"); | |
alternate.push(parseIfStatement()); | |
} else { | |
// tokens[current]?.type === TokenType.Else | |
expect(TOKEN_TYPES.Else, "Expected else token"); | |
expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); | |
// keep going until we hit {% endif %} | |
while ( | |
!(tokens[current]?.type === TOKEN_TYPES.OpenStatement && tokens[current + 1]?.type === TOKEN_TYPES.EndIf) | |
) { | |
alternate.push(parseAny()); | |
} | |
} | |
} | |
return new If(test, body, alternate); | |
} | |
function parseExpressionSequence(primary = false): Statement { | |
const fn = primary ? parsePrimaryExpression : parseExpression; | |
const expressions = [fn()]; | |
const isTuple = is(TOKEN_TYPES.Comma); | |
while (isTuple) { | |
++current; // consume comma | |
expressions.push(fn()); | |
if (!is(TOKEN_TYPES.Comma)) { | |
break; | |
} | |
} | |
return isTuple ? new TupleLiteral(expressions) : expressions[0]; | |
} | |
function parseForStatement(): For { | |
// e.g., `message` in `for message in messages` | |
const loopVariable = parseExpressionSequence(true); // should be an identifier | |
if (!(loopVariable instanceof Identifier || loopVariable instanceof TupleLiteral)) { | |
throw new SyntaxError(`Expected identifier/tuple for the loop variable, got ${loopVariable.type} instead`); | |
} | |
expect(TOKEN_TYPES.In, "Expected `in` keyword following loop variable"); | |
// `messages` in `for message in messages` | |
const iterable = parseExpression(); | |
expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); | |
// Body of for loop | |
const body: Statement[] = []; | |
// Keep going until we hit {% endfor | |
while (not(TOKEN_TYPES.OpenStatement, TOKEN_TYPES.EndFor)) { | |
body.push(parseAny()); | |
} | |
return new For(loopVariable, iterable, body); | |
} | |
function parseExpression(): Statement { | |
// Choose parse function with lowest precedence | |
return parseTernaryExpression(); | |
} | |
function parseTernaryExpression(): Statement { | |
const a = parseLogicalOrExpression(); | |
if (is(TOKEN_TYPES.If)) { | |
// Ternary expression | |
++current; // consume if | |
const predicate = parseLogicalOrExpression(); | |
expect(TOKEN_TYPES.Else, "Expected else token"); | |
const b = parseLogicalOrExpression(); | |
return new If(predicate, [a], [b]); | |
} | |
return a; | |
} | |
function parseLogicalOrExpression(): Statement { | |
let left = parseLogicalAndExpression(); | |
while (is(TOKEN_TYPES.Or)) { | |
const operator = tokens[current]; | |
++current; | |
const right = parseLogicalAndExpression(); | |
left = new BinaryExpression(operator, left, right); | |
} | |
return left; | |
} | |
function parseLogicalAndExpression(): Statement { | |
let left = parseLogicalNegationExpression(); | |
while (is(TOKEN_TYPES.And)) { | |
const operator = tokens[current]; | |
++current; | |
const right = parseLogicalNegationExpression(); | |
left = new BinaryExpression(operator, left, right); | |
} | |
return left; | |
} | |
function parseLogicalNegationExpression(): Statement { | |
let right: UnaryExpression | undefined; | |
// Try parse unary operators | |
while (is(TOKEN_TYPES.Not)) { | |
// not not ... | |
const operator = tokens[current]; | |
++current; | |
const arg = parseLogicalNegationExpression(); // not test.x === not (test.x) | |
right = new UnaryExpression(operator, arg); | |
} | |
return right ?? parseComparisonExpression(); | |
} | |
function parseComparisonExpression(): Statement { | |
// NOTE: membership has same precedence as comparison | |
// e.g., ('a' in 'apple' == 'b' in 'banana') evaluates as ('a' in ('apple' == ('b' in 'banana'))) | |
let left = parseAdditiveExpression(); | |
while (is(TOKEN_TYPES.ComparisonBinaryOperator) || is(TOKEN_TYPES.In) || is(TOKEN_TYPES.NotIn)) { | |
const operator = tokens[current]; | |
++current; | |
const right = parseAdditiveExpression(); | |
left = new BinaryExpression(operator, left, right); | |
} | |
return left; | |
} | |
function parseAdditiveExpression(): Statement { | |
let left = parseMultiplicativeExpression(); | |
while (is(TOKEN_TYPES.AdditiveBinaryOperator)) { | |
const operator = tokens[current]; | |
++current; | |
const right = parseMultiplicativeExpression(); | |
left = new BinaryExpression(operator, left, right); | |
} | |
return left; | |
} | |
function parseCallMemberExpression(): Statement { | |
// Handle member expressions recursively | |
const member = parseMemberExpression(); // foo.x | |
if (is(TOKEN_TYPES.OpenParen)) { | |
// foo.x() | |
return parseCallExpression(member); | |
} | |
return member; | |
} | |
function parseCallExpression(callee: Statement): CallExpression { | |
let callExpression = new CallExpression(callee, parseArgs()); | |
if (is(TOKEN_TYPES.OpenParen)) { | |
// foo.x()() | |
callExpression = parseCallExpression(callExpression); | |
} | |
return callExpression; | |
} | |
function parseArgs(): Statement[] { | |
// add (x + 5, foo()) | |
expect(TOKEN_TYPES.OpenParen, "Expected opening parenthesis for arguments list"); | |
const args = parseArgumentsList(); | |
expect(TOKEN_TYPES.CloseParen, "Expected closing parenthesis for arguments list"); | |
return args; | |
} | |
function parseArgumentsList(): Statement[] { | |
// comma-separated arguments list | |
const args = []; | |
while (!is(TOKEN_TYPES.CloseParen)) { | |
let argument = parseExpression(); | |
if (is(TOKEN_TYPES.Equals)) { | |
// keyword argument | |
// e.g., func(x = 5, y = a or b) | |
++current; // consume equals | |
if (!(argument instanceof Identifier)) { | |
throw new SyntaxError(`Expected identifier for keyword argument`); | |
} | |
const value = parseExpression(); | |
argument = new KeywordArgumentExpression(argument, value); | |
} | |
args.push(argument); | |
if (is(TOKEN_TYPES.Comma)) { | |
++current; // consume comma | |
} | |
} | |
return args; | |
} | |
function parseMemberExpressionArgumentsList(): Statement { | |
// NOTE: This also handles slice expressions colon-separated arguments list | |
// e.g., ['test'], [0], [:2], [1:], [1:2], [1:2:3] | |
const slices: (Statement | undefined)[] = []; | |
let isSlice = false; | |
while (!is(TOKEN_TYPES.CloseSquareBracket)) { | |
if (is(TOKEN_TYPES.Colon)) { | |
// A case where a default is used | |
// e.g., [:2] will be parsed as [undefined, 2] | |
slices.push(undefined); | |
++current; // consume colon | |
isSlice = true; | |
} else { | |
slices.push(parseExpression()); | |
if (is(TOKEN_TYPES.Colon)) { | |
++current; // consume colon after expression, if it exists | |
isSlice = true; | |
} | |
} | |
} | |
if (slices.length === 0) { | |
// [] | |
throw new SyntaxError(`Expected at least one argument for member/slice expression`); | |
} | |
if (isSlice) { | |
if (slices.length > 3) { | |
throw new SyntaxError(`Expected 0-3 arguments for slice expression`); | |
} | |
return new SliceExpression(...slices); | |
} | |
return slices[0] as Statement; // normal member expression | |
} | |
function parseMemberExpression(): Statement { | |
let object = parsePrimaryExpression(); | |
while (is(TOKEN_TYPES.Dot) || is(TOKEN_TYPES.OpenSquareBracket)) { | |
const operator = tokens[current]; // . or [ | |
++current; | |
let property: Statement; | |
const computed = operator.type !== TOKEN_TYPES.Dot; | |
if (computed) { | |
// computed (i.e., bracket notation: obj[expr]) | |
property = parseMemberExpressionArgumentsList(); | |
expect(TOKEN_TYPES.CloseSquareBracket, "Expected closing square bracket"); | |
} else { | |
// non-computed (i.e., dot notation: obj.expr) | |
property = parsePrimaryExpression(); // should be an identifier | |
if (property.type !== "Identifier") { | |
throw new SyntaxError(`Expected identifier following dot operator`); | |
} | |
} | |
object = new MemberExpression(object, property, computed); | |
} | |
return object; | |
} | |
function parseMultiplicativeExpression(): Statement { | |
let left = parseTestExpression(); | |
// Multiplicative operators have higher precedence than test expressions | |
// e.g., (4 * 4 is divisibleby(2)) evaluates as (4 * (4 is divisibleby(2))) | |
while (is(TOKEN_TYPES.MultiplicativeBinaryOperator)) { | |
const operator = tokens[current]; | |
++current; | |
const right = parseTestExpression(); | |
left = new BinaryExpression(operator, left, right); | |
} | |
return left; | |
} | |
function parseTestExpression(): Statement { | |
let operand = parseFilterExpression(); | |
while (is(TOKEN_TYPES.Is)) { | |
// Support chaining tests | |
++current; // consume is | |
const negate = is(TOKEN_TYPES.Not); | |
if (negate) { | |
++current; // consume not | |
} | |
let filter = parsePrimaryExpression(); | |
if (filter instanceof BooleanLiteral) { | |
// Special case: treat boolean literals as identifiers | |
filter = new Identifier(filter.value.toString()); | |
} | |
if (!(filter instanceof Identifier)) { | |
throw new SyntaxError(`Expected identifier for the test`); | |
} | |
// TODO: Add support for non-identifier tests | |
operand = new TestExpression(operand, negate, filter); | |
} | |
return operand; | |
} | |
function parseFilterExpression(): Statement { | |
let operand = parseCallMemberExpression(); | |
while (is(TOKEN_TYPES.Pipe)) { | |
// Support chaining filters | |
++current; // consume pipe | |
let filter = parsePrimaryExpression(); // should be an identifier | |
if (!(filter instanceof Identifier)) { | |
throw new SyntaxError(`Expected identifier for the filter`); | |
} | |
if (is(TOKEN_TYPES.OpenParen)) { | |
filter = parseCallExpression(filter); | |
} | |
operand = new FilterExpression(operand, filter as Identifier | CallExpression); | |
} | |
return operand; | |
} | |
function parsePrimaryExpression(): Statement { | |
// Primary expression: number, string, identifier, function call, parenthesized expression | |
const token = tokens[current]; | |
switch (token.type) { | |
case TOKEN_TYPES.NumericLiteral: | |
++current; | |
return new NumericLiteral(Number(token.value)); | |
case TOKEN_TYPES.StringLiteral: | |
++current; | |
return new StringLiteral(token.value); | |
case TOKEN_TYPES.BooleanLiteral: | |
++current; | |
return new BooleanLiteral(token.value === "true"); | |
case TOKEN_TYPES.Identifier: | |
++current; | |
return new Identifier(token.value); | |
case TOKEN_TYPES.OpenParen: { | |
++current; // consume opening parenthesis | |
const expression = parseExpressionSequence(); | |
if (tokens[current].type !== TOKEN_TYPES.CloseParen) { | |
throw new SyntaxError(`Expected closing parenthesis, got ${tokens[current].type} instead`); | |
} | |
++current; // consume closing parenthesis | |
return expression; | |
} | |
case TOKEN_TYPES.OpenSquareBracket: { | |
++current; // consume opening square bracket | |
const values = []; | |
while (!is(TOKEN_TYPES.CloseSquareBracket)) { | |
values.push(parseExpression()); | |
if (is(TOKEN_TYPES.Comma)) { | |
++current; // consume comma | |
} | |
} | |
++current; // consume closing square bracket | |
return new ArrayLiteral(values); | |
} | |
case TOKEN_TYPES.OpenCurlyBracket: { | |
++current; // consume opening curly bracket | |
const values = new Map(); | |
while (!is(TOKEN_TYPES.CloseCurlyBracket)) { | |
const key = parseExpression(); | |
expect(TOKEN_TYPES.Colon, "Expected colon between key and value in object literal"); | |
const value = parseExpression(); | |
values.set(key, value); | |
if (is(TOKEN_TYPES.Comma)) { | |
++current; // consume comma | |
} | |
} | |
++current; // consume closing curly bracket | |
return new ObjectLiteral(values); | |
} | |
default: | |
throw new SyntaxError(`Unexpected token: ${token.type}`); | |
} | |
} | |
while (current < tokens.length) { | |
program.body.push(parseAny()); | |
} | |
return program; | |
} | |