/** * See `expr_parser.md` for notes on the implementation of this parser! */ const ExprParser = (function() { // buffer containing a list of tokens and a movable pointer into the list class TokenBuf { constructor(tokens) { this.tokens = tokens; this.index = 0; } get peek() { return this.tokens[this.index]; } consume(termName) { if (this.peek.type !== termName) { throw new Error(`Expected a ${termName}, but got a ${this.peek.type}!`) } const node = { type: 'term', token: this.peek }; ++this.index; return node; } } // tokenize an expression string function tokenize(exprStr) { exprStr = exprStr.trim(); const tokens = []; let col = 0; function pushSymbol(sym) { tokens.push({ type: sym }); col += sym.length; } while (col < exprStr.length) { // parse fixed symbols, like operators and stuff switch (exprStr[col]) { case '(': case ')': case ',': case '&': case '|': case '+': case '-': case '*': case '/': case '^': case '=': pushSymbol(exprStr[col]); continue; case '>': pushSymbol(exprStr[col + 1] === '=' ? '>=' : '>'); continue; case '<': pushSymbol(exprStr[col + 1] === '=' ? '<=' : '<'); continue; case '!': pushSymbol(exprStr[col + 1] === '=' ? '!=' : '!'); continue; case ' ': // ignore extra whitespace ++col; continue; } if (exprStr.slice(col, col + 2) === "?=") { pushSymbol("?="); continue; } // parse a numeric literal let m; if ((m = /^\d+(?:\.\d*)?/.exec(exprStr.substring(col))) !== null) { tokens.push({ type: 'nLit', value: parseFloat(m[0]) }); col += m[0].length; continue; } // parse a string literal if ((m = /^"([^"]+)"/.exec(exprStr.substring(col))) !== null) { // with double-quotes tokens.push({ type: 'sLit', value: m[1] }); col += m[0].length; continue; } if ((m = /^'([^']+)'/.exec(exprStr.substring(col))) !== null) { // with single-quotes tokens.push({ type: 'sLit', value: m[1] }); col += m[0].length; continue; } // parse an identifier or boolean literal if ((m = /^\w[\w\d%]*/.exec(exprStr.substring(col))) !== null) { switch (m[0]) { case 'true': tokens.push({ type: 'bLit', value: true }); col += 4; continue; case 'false': tokens.push({ type: 'bLit', value: false }); col += 5; continue; } tokens.push({ type: 'ident', id: m[0] }); col += m[0].length; continue; } // if we reach here without successfully parsing a token, it's an error throw new Error(`Could not parse character "${exprStr[col]}" at position ${col}`); } tokens.push({ type: 'eof' }); return new TokenBuf(tokens); } // parse an AST from a sequence of tokens function takeExpr(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeConj(tokens)); return { type: 'nonterm', name: 'expr', prod: 0, children }; default: throw new Error('Could not parse expr!'); } } function takeExprList(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeExpr(tokens)); children.push(takeExprList0(tokens)); return { type: 'nonterm', name: 'exprList', prod: 0, children }; default: throw new Error('Could not parse exprList!'); } } function takeExprList0(tokens) { const children = []; switch (tokens.peek.type) { case ')': return { type: 'nonterm', name: 'exprList\'', prod: 1, children }; case ',': children.push(tokens.consume(',')); children.push(takeExpr(tokens)); children.push(takeExprList0(tokens)); return { type: 'nonterm', name: 'exprList\'', prod: 0, children }; case 'eof': return { type: 'nonterm', name: 'exprList\'', prod: 1, children }; default: throw new Error('Could not parse exprList\'!'); } } function takeConj(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeDisj(tokens)); children.push(takeConj0(tokens)); return { type: 'nonterm', name: 'conj', prod: 0, children }; default: throw new Error('Could not parse conj!'); } } function takeConj0(tokens) { const children = []; switch (tokens.peek.type) { case '&': children.push(tokens.consume('&')); children.push(takeDisj(tokens)); children.push(takeConj0(tokens)); return { type: 'nonterm', name: 'conj\'', prod: 0, children }; case ')': case ',': case 'eof': return { type: 'nonterm', name: 'conj\'', prod: 1, children }; default: throw new Error('Could not parse conj\'!'); } } function takeDisj(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeCmpEq(tokens)); children.push(takeDisj0(tokens)); return { type: 'nonterm', name: 'disj', prod: 0, children }; default: throw new Error('Could not parse disj!'); } } function takeDisj0(tokens) { const children = []; switch (tokens.peek.type) { case '&': case ')': case ',': return { type: 'nonterm', name: 'disj\'', prod: 1, children }; case '|': children.push(tokens.consume('|')); children.push(takeCmpEq(tokens)); children.push(takeDisj0(tokens)); return { type: 'nonterm', name: 'disj\'', prod: 0, children }; case 'eof': return { type: 'nonterm', name: 'disj\'', prod: 1, children }; default: throw new Error('Could not parse disj\'!'); } } function takeCmpEq(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeCmpRel(tokens)); children.push(takeCmpEq0(tokens)); return { type: 'nonterm', name: 'cmpEq', prod: 0, children }; default: throw new Error('Could not parse cmpEq!'); } } function takeCmpEq0(tokens) { const children = []; switch (tokens.peek.type) { case '!=': children.push(tokens.consume('!=')); children.push(takeCmpRel(tokens)); children.push(takeCmpEq0(tokens)); return { type: 'nonterm', name: 'cmpEq\'', prod: 1, children }; case '&': case ')': case ',': return { type: 'nonterm', name: 'cmpEq\'', prod: 3, children }; case '=': children.push(tokens.consume('=')); children.push(takeCmpRel(tokens)); children.push(takeCmpEq0(tokens)); return { type: 'nonterm', name: 'cmpEq\'', prod: 0, children }; case '?=': children.push(tokens.consume('?=')); children.push(takeCmpRel(tokens)); children.push(takeCmpEq0(tokens)); return { type: 'nonterm', name: 'cmpEq\'', prod: 2, children }; case '|': case 'eof': return { type: 'nonterm', name: 'cmpEq\'', prod: 3, children }; default: throw new Error('Could not parse cmpEq\'!'); } } function takeCmpRel(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeSum(tokens)); children.push(takeCmpRel0(tokens)); return { type: 'nonterm', name: 'cmpRel', prod: 0, children }; default: throw new Error('Could not parse cmpRel!'); } } function takeCmpRel0(tokens) { const children = []; switch (tokens.peek.type) { case '!=': case '&': case ')': case ',': return { type: 'nonterm', name: 'cmpRel\'', prod: 4, children }; case '<': children.push(tokens.consume('<')); children.push(takeSum(tokens)); children.push(takeCmpRel0(tokens)); return { type: 'nonterm', name: 'cmpRel\'', prod: 1, children }; case '<=': children.push(tokens.consume('<=')); children.push(takeSum(tokens)); children.push(takeCmpRel0(tokens)); return { type: 'nonterm', name: 'cmpRel\'', prod: 0, children }; case '=': return { type: 'nonterm', name: 'cmpRel\'', prod: 4, children }; case '>': children.push(tokens.consume('>')); children.push(takeSum(tokens)); children.push(takeCmpRel0(tokens)); return { type: 'nonterm', name: 'cmpRel\'', prod: 2, children }; case '>=': children.push(tokens.consume('>=')); children.push(takeSum(tokens)); children.push(takeCmpRel0(tokens)); return { type: 'nonterm', name: 'cmpRel\'', prod: 3, children }; case '?=': case '|': case 'eof': return { type: 'nonterm', name: 'cmpRel\'', prod: 4, children }; default: throw new Error('Could not parse cmpRel\'!'); } } function takeSum(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeProd(tokens)); children.push(takeSum0(tokens)); return { type: 'nonterm', name: 'sum', prod: 0, children }; default: throw new Error('Could not parse sum!'); } } function takeSum0(tokens) { const children = []; switch (tokens.peek.type) { case '!=': case '&': case ')': return { type: 'nonterm', name: 'sum\'', prod: 2, children }; case '+': children.push(tokens.consume('+')); children.push(takeProd(tokens)); children.push(takeSum0(tokens)); return { type: 'nonterm', name: 'sum\'', prod: 0, children }; case ',': return { type: 'nonterm', name: 'sum\'', prod: 2, children }; case '-': children.push(tokens.consume('-')); children.push(takeProd(tokens)); children.push(takeSum0(tokens)); return { type: 'nonterm', name: 'sum\'', prod: 1, children }; case '<': case '<=': case '=': case '>': case '>=': case '?=': case '|': case 'eof': return { type: 'nonterm', name: 'sum\'', prod: 2, children }; default: throw new Error('Could not parse sum\'!'); } } function takeProd(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeExp(tokens)); children.push(takeProd0(tokens)); return { type: 'nonterm', name: 'prod', prod: 0, children }; default: throw new Error('Could not parse prod!'); } } function takeProd0(tokens) { const children = []; switch (tokens.peek.type) { case '!=': case '&': case ')': return { type: 'nonterm', name: 'prod\'', prod: 2, children }; case '*': children.push(tokens.consume('*')); children.push(takeExp(tokens)); children.push(takeProd0(tokens)); return { type: 'nonterm', name: 'prod\'', prod: 0, children }; case '+': case ',': case '-': return { type: 'nonterm', name: 'prod\'', prod: 2, children }; case '/': children.push(tokens.consume('/')); children.push(takeExp(tokens)); children.push(takeProd0(tokens)); return { type: 'nonterm', name: 'prod\'', prod: 1, children }; case '<': case '<=': case '=': case '>': case '>=': case '?=': case '|': case 'eof': return { type: 'nonterm', name: 'prod\'', prod: 2, children }; default: throw new Error('Could not parse prod\'!'); } } function takeExp(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeUnary(tokens)); children.push(takeExp0(tokens)); return { type: 'nonterm', name: 'exp', prod: 0, children }; default: throw new Error('Could not parse exp!'); } } function takeExp0(tokens) { const children = []; switch (tokens.peek.type) { case '!=': case '&': case ')': case '*': case '+': case ',': case '-': case '/': case '<': case '<=': case '=': case '>': case '>=': case '?=': return { type: 'nonterm', name: 'exp\'', prod: 1, children }; case '^': children.push(tokens.consume('^')); children.push(takeUnary(tokens)); children.push(takeExp0(tokens)); return { type: 'nonterm', name: 'exp\'', prod: 0, children }; case '|': case 'eof': return { type: 'nonterm', name: 'exp\'', prod: 1, children }; default: throw new Error('Could not parse exp\'!'); } } function takeUnary(tokens) { const children = []; switch (tokens.peek.type) { case '!': children.push(tokens.consume('!')); children.push(takeUnary(tokens)); return { type: 'nonterm', name: 'unary', prod: 1, children }; case '(': children.push(takePrim(tokens)); return { type: 'nonterm', name: 'unary', prod: 2, children }; case '-': children.push(tokens.consume('-')); children.push(takeUnary(tokens)); return { type: 'nonterm', name: 'unary', prod: 0, children }; case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takePrim(tokens)); return { type: 'nonterm', name: 'unary', prod: 2, children }; default: throw new Error('Could not parse unary!'); } } function takePrim(tokens) { const children = []; switch (tokens.peek.type) { case '(': children.push(tokens.consume('(')); children.push(takeExpr(tokens)); children.push(tokens.consume(')')); return { type: 'nonterm', name: 'prim', prod: 4, children }; case 'bLit': children.push(tokens.consume('bLit')); return { type: 'nonterm', name: 'prim', prod: 1, children }; case 'ident': children.push(tokens.consume('ident')); children.push(takeIdentTail(tokens)); return { type: 'nonterm', name: 'prim', prod: 3, children }; case 'nLit': children.push(tokens.consume('nLit')); return { type: 'nonterm', name: 'prim', prod: 0, children }; case 'sLit': children.push(tokens.consume('sLit')); return { type: 'nonterm', name: 'prim', prod: 2, children }; default: throw new Error('Could not parse prim!'); } } function takeIdentTail(tokens) { const children = []; switch (tokens.peek.type) { case '!=': case '&': return { type: 'nonterm', name: 'identTail', prod: 1, children }; case '(': children.push(tokens.consume('(')); children.push(takeArgs(tokens)); children.push(tokens.consume(')')); return { type: 'nonterm', name: 'identTail', prod: 0, children }; case ')': case '*': case '+': case ',': case '-': case '/': case '<': case '<=': case '=': case '>': case '>=': case '?=': case '^': case '|': case 'eof': return { type: 'nonterm', name: 'identTail', prod: 1, children }; default: throw new Error('Could not parse identTail!'); } } function takeArgs(tokens) { const children = []; switch (tokens.peek.type) { case '!': case '(': children.push(takeExprList(tokens)); return { type: 'nonterm', name: 'args', prod: 0, children }; case ')': return { type: 'nonterm', name: 'args', prod: 1, children }; case '-': case 'bLit': case 'ident': case 'nLit': case 'sLit': children.push(takeExprList(tokens)); return { type: 'nonterm', name: 'args', prod: 0, children }; case 'eof': return { type: 'nonterm', name: 'args', prod: 1, children }; default: throw new Error('Could not parse args!'); } } // apply tree transformations to recover the nominal-grammar AST function fixUp(ast) { if (ast.type === 'term') { return ast; } switch (ast.name) { case 'exprList': // recover left-recursive structures case 'conj': case 'disj': case 'exp': return fixUpRightRecurse(ast, 1); case 'sum': case 'prod': return fixUpRightRecurse(ast, 2); case 'cmpEq': return fixUpRightRecurse(ast, 3); case 'cmpRel': return fixUpRightRecurse(ast, 4); case 'prim': // recover left-factored identifier things switch (ast.prod) { case 3: switch (ast.children[1].prod) { case 0: // function call return { type: 'nonterm', name: 'prim', prod: 3, children: [fixUp(ast.children[0]), ...mapFixUp(ast.children[1].children)] }; case 1: // just an identifier return { type: 'nonterm', name: 'prim', prod: 4, children: [fixUp(ast.children[0])] }; } break; case 4: return { ...ast, prod: 5, children: mapFixUp(ast.children) }; } break; } return { ...ast, children: mapFixUp(ast.children) }; } function mapFixUp(nodes) { return nodes.map(chAst => fixUp(chAst)); } function fixUpRightRecurse(ast, maxProd) { let nomAst = { type: 'nonterm', name: ast.name, prod: maxProd, children: [fixUp(ast.children[0])] }; let tree = ast.children[1]; while (tree.prod < maxProd) { nomAst = { type: 'nonterm', name: ast.name, prod: tree.prod, children: [nomAst, ...mapFixUp(tree.children.slice(0, tree.children.length - 1))] }; tree = tree.children[tree.children.length - 1]; } return nomAst; } // compile nominal AST into an item builder expression function function translate(ast, builtInProps, builtInFuncs) { function trans(ast) { return translate(ast, builtInProps, builtInFuncs); } if (ast.type === 'term') { switch (ast.token.type) { case 'nLit': return ast.token.value; case 'bLit': return ast.token.value; case 'sLit': return ast.token.value; case 'ident': return ast.token.value; } } else if (ast.type === 'nonterm') { switch (ast.name) { case 'expr': return trans(ast.children[0]); case 'exprList': switch (ast.prod) { case 0: return [...ast.children[0], trans(ast.children[2])]; case 1: return [trans(ast.children[0])]; } break; case 'conj': switch (ast.prod) { case 0: return new ConjTerm(trans(ast.children[0]), trans(ast.children[2])); case 1: return trans(ast.children[0]); } break; case 'disj': switch (ast.prod) { case 0: return new DisjTerm(trans(ast.children[0]), trans(ast.children[2])); case 1: return trans(ast.children[0]); } break; case 'cmpEq': switch (ast.prod) { case 0: return new EqTerm(trans(ast.children[0]), trans(ast.children[2])); case 1: return new NeqTerm(trans(ast.children[0]), trans(ast.children[2])); case 2: return new ContainsTerm(trans(ast.children[0]), trans(ast.children[2])); case 3: return trans(ast.children[0]); } break; case 'cmpRel': switch (ast.prod) { case 0: return new LeqTerm(trans(ast.children[0]), trans(ast.children[2])); case 1: return new LtTerm(trans(ast.children[0]), trans(ast.children[2])); case 2: return new GtTerm(trans(ast.children[0]), trans(ast.children[2])); case 3: return new GeqTerm(trans(ast.children[0]), trans(ast.children[2])); case 4: return trans(ast.children[0]); } break; case 'sum': switch (ast.prod) { case 0: return new AddTerm(trans(ast.children[0]), trans(ast.children[2])); case 1: return new SubTerm(trans(ast.children[0]), trans(ast.children[2])); case 2: return trans(ast.children[0]); } break; case 'prod': switch (ast.prod) { case 0: return new MulTerm(trans(ast.children[0]), trans(ast.children[2])); case 1: return new DivTerm(trans(ast.children[0]), trans(ast.children[2])); case 2: return trans(ast.children[0]); } break; case 'exp': switch (ast.prod) { case 0: return new ExpTerm(trans(ast.children[0]), trans(ast.children[2])); case 1: return trans(ast.children[0]); } break; case 'unary': switch (ast.prod) { case 0: return new NegTerm(trans(ast.children[1])); case 1: return new InvTerm(trans(ast.children[1])); case 2: return trans(ast.children[0]); } break; case 'prim': switch (ast.prod) { case 0: return new NumLitTerm(ast.children[0].token.value); case 1: return new BoolLitTerm(ast.children[0].token.value); case 2: return new StrLitTerm(ast.children[0].token.value); case 3: const fn = builtInFuncs[ast.children[0].token.id]; if (!fn) { throw new Error(`Unknown function: ${ast.children[0].token.id}`); } return new FnCallTerm(fn, trans(ast.children[2])); case 4: { const prop = builtInProps[ast.children[0].token.id]; if (!prop) { throw new Error(`Unknown property: ${ast.children[0].token.id}`); } return new PropTerm(prop); } case 5: return trans(ast.children[1]); } break; case 'args': switch (ast.prod) { case 0: return trans(ast.children[0]); case 1: return []; } break; } } } return class ExprParser { constructor(builtInProps, builtInFuncs) { this.builtInProps = builtInProps; this.builtInFuncs = builtInFuncs; } parse(exprStr) { const tokens = tokenize(exprStr); if (tokens.tokens.length <= 1) { return null; } const transAst = takeExpr(tokens, 0); if (tokens.peek.type !== 'eof') { throw new Error('Could not parse entire expression!'); } const nomAst = fixUp(transAst); return translate(nomAst, this.builtInProps, this.builtInFuncs); } }; })();