machineuser
Sync widgets demo
ca548dc
raw
history blame
15.8 kB
import type { Token, TokenType } from "./lexer";
import { TOKEN_TYPES } from "./lexer";
import type { Statement } from "./ast";
import {
Program,
If,
For,
SetStatement,
MemberExpression,
CallExpression,
Identifier,
NumericLiteral,
StringLiteral,
BooleanLiteral,
ArrayLiteral,
ObjectLiteral,
BinaryExpression,
FilterExpression,
TestExpression,
UnaryExpression,
SliceExpression,
KeywordArgumentExpression,
TupleLiteral,
} from "./ast";
/**
* Generate the Abstract Syntax Tree (AST) from a list of tokens.
* Operator precedence can be found here: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_precedence#table
*/
export function parse(tokens: Token[]): Program {
const program = new Program([]);
let current = 0;
/**
* Consume the next token if it matches the expected type, otherwise throw an error.
* @param type The expected token type
* @param error The error message to throw if the token does not match the expected type
* @returns The consumed token
*/
function expect(type: string, error: string): Token {
const prev = tokens[current++];
if (!prev || prev.type !== type) {
throw new Error(`Parser Error: ${error}. ${prev.type} !== ${type}.`);
}
return prev;
}
function parseAny(): Statement {
switch (tokens[current].type) {
case TOKEN_TYPES.Text:
return parseText();
case TOKEN_TYPES.OpenStatement:
return parseJinjaStatement();
case TOKEN_TYPES.OpenExpression:
return parseJinjaExpression();
default:
throw new SyntaxError(`Unexpected token type: ${tokens[current].type}`);
}
}
function not(...types: TokenType[]): boolean {
return current + types.length <= tokens.length && types.some((type, i) => type !== tokens[current + i].type);
}
function is(...types: TokenType[]): boolean {
return current + types.length <= tokens.length && types.every((type, i) => type === tokens[current + i].type);
}
function parseText(): StringLiteral {
return new StringLiteral(expect(TOKEN_TYPES.Text, "Expected text token").value);
}
function parseJinjaStatement(): Statement {
// Consume {% %} tokens
expect(TOKEN_TYPES.OpenStatement, "Expected opening statement token");
let result;
switch (tokens[current].type) {
case TOKEN_TYPES.Set:
++current;
result = parseSetStatement();
expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token");
break;
case TOKEN_TYPES.If:
++current;
result = parseIfStatement();
expect(TOKEN_TYPES.OpenStatement, "Expected {% token");
expect(TOKEN_TYPES.EndIf, "Expected endif token");
expect(TOKEN_TYPES.CloseStatement, "Expected %} token");
break;
case TOKEN_TYPES.For:
++current;
result = parseForStatement();
expect(TOKEN_TYPES.OpenStatement, "Expected {% token");
expect(TOKEN_TYPES.EndFor, "Expected endfor token");
expect(TOKEN_TYPES.CloseStatement, "Expected %} token");
break;
default:
throw new SyntaxError(`Unknown statement type: ${tokens[current].type}`);
}
return result;
}
function parseJinjaExpression(): Statement {
// Consume {{ }} tokens
expect(TOKEN_TYPES.OpenExpression, "Expected opening expression token");
const result = parseExpression();
expect(TOKEN_TYPES.CloseExpression, "Expected closing expression token");
return result;
}
// NOTE: `set` acts as both declaration statement and assignment expression
function parseSetStatement(): Statement {
const left = parseExpression();
if (is(TOKEN_TYPES.Equals)) {
++current;
const value = parseSetStatement();
return new SetStatement(left, value);
}
return left;
}
function parseIfStatement(): If {
const test = parseExpression();
expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token");
const body: Statement[] = [];
const alternate: Statement[] = [];
// Keep parsing if body until we reach the first {% elif %} or {% else %} or {% endif %}
while (
!(
tokens[current]?.type === TOKEN_TYPES.OpenStatement &&
(tokens[current + 1]?.type === TOKEN_TYPES.ElseIf ||
tokens[current + 1]?.type === TOKEN_TYPES.Else ||
tokens[current + 1]?.type === TOKEN_TYPES.EndIf)
)
) {
body.push(parseAny());
}
// Alternate branch: Check for {% elif %} or {% else %}
if (
tokens[current]?.type === TOKEN_TYPES.OpenStatement &&
tokens[current + 1]?.type !== TOKEN_TYPES.EndIf // There is some body
) {
++current; // eat {% token
if (is(TOKEN_TYPES.ElseIf)) {
expect(TOKEN_TYPES.ElseIf, "Expected elseif token");
alternate.push(parseIfStatement());
} else {
// tokens[current]?.type === TokenType.Else
expect(TOKEN_TYPES.Else, "Expected else token");
expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token");
// keep going until we hit {% endif %}
while (
!(tokens[current]?.type === TOKEN_TYPES.OpenStatement && tokens[current + 1]?.type === TOKEN_TYPES.EndIf)
) {
alternate.push(parseAny());
}
}
}
return new If(test, body, alternate);
}
function parseExpressionSequence(primary = false): Statement {
const fn = primary ? parsePrimaryExpression : parseExpression;
const expressions = [fn()];
const isTuple = is(TOKEN_TYPES.Comma);
while (isTuple) {
++current; // consume comma
expressions.push(fn());
if (!is(TOKEN_TYPES.Comma)) {
break;
}
}
return isTuple ? new TupleLiteral(expressions) : expressions[0];
}
function parseForStatement(): For {
// e.g., `message` in `for message in messages`
const loopVariable = parseExpressionSequence(true); // should be an identifier
if (!(loopVariable instanceof Identifier || loopVariable instanceof TupleLiteral)) {
throw new SyntaxError(`Expected identifier/tuple for the loop variable, got ${loopVariable.type} instead`);
}
expect(TOKEN_TYPES.In, "Expected `in` keyword following loop variable");
// `messages` in `for message in messages`
const iterable = parseExpression();
expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token");
// Body of for loop
const body: Statement[] = [];
// Keep going until we hit {% endfor
while (not(TOKEN_TYPES.OpenStatement, TOKEN_TYPES.EndFor)) {
body.push(parseAny());
}
return new For(loopVariable, iterable, body);
}
function parseExpression(): Statement {
// Choose parse function with lowest precedence
return parseTernaryExpression();
}
function parseTernaryExpression(): Statement {
const a = parseLogicalOrExpression();
if (is(TOKEN_TYPES.If)) {
// Ternary expression
++current; // consume if
const predicate = parseLogicalOrExpression();
expect(TOKEN_TYPES.Else, "Expected else token");
const b = parseLogicalOrExpression();
return new If(predicate, [a], [b]);
}
return a;
}
function parseLogicalOrExpression(): Statement {
let left = parseLogicalAndExpression();
while (is(TOKEN_TYPES.Or)) {
const operator = tokens[current];
++current;
const right = parseLogicalAndExpression();
left = new BinaryExpression(operator, left, right);
}
return left;
}
function parseLogicalAndExpression(): Statement {
let left = parseLogicalNegationExpression();
while (is(TOKEN_TYPES.And)) {
const operator = tokens[current];
++current;
const right = parseLogicalNegationExpression();
left = new BinaryExpression(operator, left, right);
}
return left;
}
function parseLogicalNegationExpression(): Statement {
let right: UnaryExpression | undefined;
// Try parse unary operators
while (is(TOKEN_TYPES.Not)) {
// not not ...
const operator = tokens[current];
++current;
const arg = parseLogicalNegationExpression(); // not test.x === not (test.x)
right = new UnaryExpression(operator, arg);
}
return right ?? parseComparisonExpression();
}
function parseComparisonExpression(): Statement {
// NOTE: membership has same precedence as comparison
// e.g., ('a' in 'apple' == 'b' in 'banana') evaluates as ('a' in ('apple' == ('b' in 'banana')))
let left = parseAdditiveExpression();
while (is(TOKEN_TYPES.ComparisonBinaryOperator) || is(TOKEN_TYPES.In) || is(TOKEN_TYPES.NotIn)) {
const operator = tokens[current];
++current;
const right = parseAdditiveExpression();
left = new BinaryExpression(operator, left, right);
}
return left;
}
function parseAdditiveExpression(): Statement {
let left = parseMultiplicativeExpression();
while (is(TOKEN_TYPES.AdditiveBinaryOperator)) {
const operator = tokens[current];
++current;
const right = parseMultiplicativeExpression();
left = new BinaryExpression(operator, left, right);
}
return left;
}
function parseCallMemberExpression(): Statement {
// Handle member expressions recursively
const member = parseMemberExpression(); // foo.x
if (is(TOKEN_TYPES.OpenParen)) {
// foo.x()
return parseCallExpression(member);
}
return member;
}
function parseCallExpression(callee: Statement): CallExpression {
let callExpression = new CallExpression(callee, parseArgs());
if (is(TOKEN_TYPES.OpenParen)) {
// foo.x()()
callExpression = parseCallExpression(callExpression);
}
return callExpression;
}
function parseArgs(): Statement[] {
// add (x + 5, foo())
expect(TOKEN_TYPES.OpenParen, "Expected opening parenthesis for arguments list");
const args = parseArgumentsList();
expect(TOKEN_TYPES.CloseParen, "Expected closing parenthesis for arguments list");
return args;
}
function parseArgumentsList(): Statement[] {
// comma-separated arguments list
const args = [];
while (!is(TOKEN_TYPES.CloseParen)) {
let argument = parseExpression();
if (is(TOKEN_TYPES.Equals)) {
// keyword argument
// e.g., func(x = 5, y = a or b)
++current; // consume equals
if (!(argument instanceof Identifier)) {
throw new SyntaxError(`Expected identifier for keyword argument`);
}
const value = parseExpression();
argument = new KeywordArgumentExpression(argument, value);
}
args.push(argument);
if (is(TOKEN_TYPES.Comma)) {
++current; // consume comma
}
}
return args;
}
function parseMemberExpressionArgumentsList(): Statement {
// NOTE: This also handles slice expressions colon-separated arguments list
// e.g., ['test'], [0], [:2], [1:], [1:2], [1:2:3]
const slices: (Statement | undefined)[] = [];
let isSlice = false;
while (!is(TOKEN_TYPES.CloseSquareBracket)) {
if (is(TOKEN_TYPES.Colon)) {
// A case where a default is used
// e.g., [:2] will be parsed as [undefined, 2]
slices.push(undefined);
++current; // consume colon
isSlice = true;
} else {
slices.push(parseExpression());
if (is(TOKEN_TYPES.Colon)) {
++current; // consume colon after expression, if it exists
isSlice = true;
}
}
}
if (slices.length === 0) {
// []
throw new SyntaxError(`Expected at least one argument for member/slice expression`);
}
if (isSlice) {
if (slices.length > 3) {
throw new SyntaxError(`Expected 0-3 arguments for slice expression`);
}
return new SliceExpression(...slices);
}
return slices[0] as Statement; // normal member expression
}
function parseMemberExpression(): Statement {
let object = parsePrimaryExpression();
while (is(TOKEN_TYPES.Dot) || is(TOKEN_TYPES.OpenSquareBracket)) {
const operator = tokens[current]; // . or [
++current;
let property: Statement;
const computed = operator.type !== TOKEN_TYPES.Dot;
if (computed) {
// computed (i.e., bracket notation: obj[expr])
property = parseMemberExpressionArgumentsList();
expect(TOKEN_TYPES.CloseSquareBracket, "Expected closing square bracket");
} else {
// non-computed (i.e., dot notation: obj.expr)
property = parsePrimaryExpression(); // should be an identifier
if (property.type !== "Identifier") {
throw new SyntaxError(`Expected identifier following dot operator`);
}
}
object = new MemberExpression(object, property, computed);
}
return object;
}
function parseMultiplicativeExpression(): Statement {
let left = parseTestExpression();
// Multiplicative operators have higher precedence than test expressions
// e.g., (4 * 4 is divisibleby(2)) evaluates as (4 * (4 is divisibleby(2)))
while (is(TOKEN_TYPES.MultiplicativeBinaryOperator)) {
const operator = tokens[current];
++current;
const right = parseTestExpression();
left = new BinaryExpression(operator, left, right);
}
return left;
}
function parseTestExpression(): Statement {
let operand = parseFilterExpression();
while (is(TOKEN_TYPES.Is)) {
// Support chaining tests
++current; // consume is
const negate = is(TOKEN_TYPES.Not);
if (negate) {
++current; // consume not
}
let filter = parsePrimaryExpression();
if (filter instanceof BooleanLiteral) {
// Special case: treat boolean literals as identifiers
filter = new Identifier(filter.value.toString());
}
if (!(filter instanceof Identifier)) {
throw new SyntaxError(`Expected identifier for the test`);
}
// TODO: Add support for non-identifier tests
operand = new TestExpression(operand, negate, filter);
}
return operand;
}
function parseFilterExpression(): Statement {
let operand = parseCallMemberExpression();
while (is(TOKEN_TYPES.Pipe)) {
// Support chaining filters
++current; // consume pipe
let filter = parsePrimaryExpression(); // should be an identifier
if (!(filter instanceof Identifier)) {
throw new SyntaxError(`Expected identifier for the filter`);
}
if (is(TOKEN_TYPES.OpenParen)) {
filter = parseCallExpression(filter);
}
operand = new FilterExpression(operand, filter as Identifier | CallExpression);
}
return operand;
}
function parsePrimaryExpression(): Statement {
// Primary expression: number, string, identifier, function call, parenthesized expression
const token = tokens[current];
switch (token.type) {
case TOKEN_TYPES.NumericLiteral:
++current;
return new NumericLiteral(Number(token.value));
case TOKEN_TYPES.StringLiteral:
++current;
return new StringLiteral(token.value);
case TOKEN_TYPES.BooleanLiteral:
++current;
return new BooleanLiteral(token.value === "true");
case TOKEN_TYPES.Identifier:
++current;
return new Identifier(token.value);
case TOKEN_TYPES.OpenParen: {
++current; // consume opening parenthesis
const expression = parseExpressionSequence();
if (tokens[current].type !== TOKEN_TYPES.CloseParen) {
throw new SyntaxError(`Expected closing parenthesis, got ${tokens[current].type} instead`);
}
++current; // consume closing parenthesis
return expression;
}
case TOKEN_TYPES.OpenSquareBracket: {
++current; // consume opening square bracket
const values = [];
while (!is(TOKEN_TYPES.CloseSquareBracket)) {
values.push(parseExpression());
if (is(TOKEN_TYPES.Comma)) {
++current; // consume comma
}
}
++current; // consume closing square bracket
return new ArrayLiteral(values);
}
case TOKEN_TYPES.OpenCurlyBracket: {
++current; // consume opening curly bracket
const values = new Map();
while (!is(TOKEN_TYPES.CloseCurlyBracket)) {
const key = parseExpression();
expect(TOKEN_TYPES.Colon, "Expected colon between key and value in object literal");
const value = parseExpression();
values.set(key, value);
if (is(TOKEN_TYPES.Comma)) {
++current; // consume comma
}
}
++current; // consume closing curly bracket
return new ObjectLiteral(values);
}
default:
throw new SyntaxError(`Unexpected token: ${token.type}`);
}
}
while (current < tokens.length) {
program.body.push(parseAny());
}
return program;
}