跳到主要内容

手写简易编译器

问题

如何从零实现一个简易编译器/解释器?手写四则运算编译器、JSON Parser 和模板编译器。

答案

1. 四则运算解释器

实现一个支持 +, -, *, /, () 和运算符优先级的计算器。

完整实现

// ==================== 类型定义 ====================
type TokenType = 'Number' | 'Operator' | 'LParen' | 'RParen' | 'EOF';

interface Token {
type: TokenType;
value: string;
}

type ASTNode = NumberNode | BinaryNode | UnaryNode;

interface NumberNode {
type: 'NumberLiteral';
value: number;
}

interface BinaryNode {
type: 'BinaryExpression';
operator: string;
left: ASTNode;
right: ASTNode;
}

interface UnaryNode {
type: 'UnaryExpression';
operator: string;
argument: ASTNode;
}

// ==================== 词法分析 ====================
function tokenize(input: string): Token[] {
const tokens: Token[] = [];
let i = 0;

while (i < input.length) {
const char = input[i];

// 跳过空白
if (/\s/.test(char)) { i++; continue; }

// 数字(含小数)
if (/[0-9]/.test(char)) {
let num = '';
while (i < input.length && /[0-9.]/.test(input[i])) {
num += input[i++];
}
tokens.push({ type: 'Number', value: num });
continue;
}

// 运算符
if ('+-*/'.includes(char)) {
tokens.push({ type: 'Operator', value: char });
i++;
continue;
}

// 括号
if (char === '(') { tokens.push({ type: 'LParen', value: '(' }); i++; continue; }
if (char === ')') { tokens.push({ type: 'RParen', value: ')' }); i++; continue; }

throw new SyntaxError(`Unexpected character: '${char}' at position ${i}`);
}

tokens.push({ type: 'EOF', value: '' });
return tokens;
}

// ==================== 语法分析(递归下降) ====================
// 语法规则(BNF):
// expression = term (('+' | '-') term)*
// term = unary (('*' | '/') unary)*
// unary = ('-' unary) | primary
// primary = NUMBER | '(' expression ')'

class Parser {
private tokens: Token[];
private pos = 0;

constructor(tokens: Token[]) {
this.tokens = tokens;
}

private peek(): Token {
return this.tokens[this.pos];
}

private advance(): Token {
const token = this.tokens[this.pos];
this.pos++;
return token;
}

private expect(type: TokenType): Token {
const token = this.peek();
if (token.type !== type) {
throw new SyntaxError(`Expected ${type} but got ${token.type}`);
}
return this.advance();
}

// expression = term (('+' | '-') term)*
parseExpression(): ASTNode {
let left = this.parseTerm();

while (
this.peek().type === 'Operator' &&
(this.peek().value === '+' || this.peek().value === '-')
) {
const op = this.advance().value;
const right = this.parseTerm();
left = { type: 'BinaryExpression', operator: op, left, right };
}

return left;
}

// term = unary (('*' | '/') unary)*
private parseTerm(): ASTNode {
let left = this.parseUnary();

while (
this.peek().type === 'Operator' &&
(this.peek().value === '*' || this.peek().value === '/')
) {
const op = this.advance().value;
const right = this.parseUnary();
left = { type: 'BinaryExpression', operator: op, left, right };
}

return left;
}

// unary = ('-' unary) | primary
private parseUnary(): ASTNode {
if (this.peek().type === 'Operator' && this.peek().value === '-') {
this.advance();
const argument = this.parseUnary();
return { type: 'UnaryExpression', operator: '-', argument };
}
return this.parsePrimary();
}

// primary = NUMBER | '(' expression ')'
private parsePrimary(): ASTNode {
if (this.peek().type === 'Number') {
return { type: 'NumberLiteral', value: Number(this.advance().value) };
}

if (this.peek().type === 'LParen') {
this.advance(); // 跳过 '('
const expr = this.parseExpression();
this.expect('RParen'); // 跳过 ')'
return expr;
}

throw new SyntaxError(`Unexpected token: ${this.peek().value}`);
}
}

// ==================== 求值(解释器) ====================
function evaluate(node: ASTNode): number {
switch (node.type) {
case 'NumberLiteral':
return node.value;

case 'UnaryExpression':
if (node.operator === '-') return -evaluate(node.argument);
throw new Error(`Unknown unary operator: ${node.operator}`);

case 'BinaryExpression': {
const left = evaluate(node.left);
const right = evaluate(node.right);
switch (node.operator) {
case '+': return left + right;
case '-': return left - right;
case '*': return left * right;
case '/':
if (right === 0) throw new Error('Division by zero');
return left / right;
default:
throw new Error(`Unknown operator: ${node.operator}`);
}
}
}
}

// ==================== 使用 ====================
function calc(expression: string): number {
const tokens = tokenize(expression);
const parser = new Parser(tokens);
const ast = parser.parseExpression();
return evaluate(ast);
}

// 测试
console.log(calc('1 + 2')); // 3
console.log(calc('2 + 3 * 4')); // 14 (优先级正确)
console.log(calc('(2 + 3) * 4')); // 20
console.log(calc('-3 + 4')); // 1
console.log(calc('10 / (5 - 3)')); // 5
运算符优先级

通过语法规则的层级实现优先级:expressiontermunaryprimary。 层级越深,优先级越高。*/term 层处理,优先于 expression 层的 +-

2. 手写 JSON Parser

实现一个支持完整 JSON 语法的解析器:

type JSONValue = string | number | boolean | null | JSONValue[] | JSONObject;
interface JSONObject { [key: string]: JSONValue; }

class JSONParser {
private input: string;
private pos = 0;

constructor(input: string) {
this.input = input;
}

parse(): JSONValue {
this.skipWhitespace();
const value = this.parseValue();
this.skipWhitespace();
if (this.pos < this.input.length) {
throw new SyntaxError(`Unexpected character at position ${this.pos}`);
}
return value;
}

private parseValue(): JSONValue {
const char = this.input[this.pos];

if (char === '"') return this.parseString();
if (char === '{') return this.parseObject();
if (char === '[') return this.parseArray();
if (char === 't' || char === 'f') return this.parseBoolean();
if (char === 'n') return this.parseNull();
if (char === '-' || /[0-9]/.test(char)) return this.parseNumber();

throw new SyntaxError(`Unexpected character '${char}' at position ${this.pos}`);
}

// 解析字符串
private parseString(): string {
this.expect('"');
let result = '';

while (this.pos < this.input.length && this.input[this.pos] !== '"') {
if (this.input[this.pos] === '\\') {
this.pos++; // 跳过反斜杠
const escaped = this.input[this.pos];
const escapeMap: Record<string, string> = {
'"': '"', '\\': '\\', '/': '/',
'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
};
if (escaped === 'u') {
// Unicode 转义:\uXXXX
const hex = this.input.slice(this.pos + 1, this.pos + 5);
result += String.fromCharCode(parseInt(hex, 16));
this.pos += 5;
continue;
}
result += escapeMap[escaped] ?? escaped;
} else {
result += this.input[this.pos];
}
this.pos++;
}

this.expect('"');
return result;
}

// 解析数字
private parseNumber(): number {
let numStr = '';

// 负号
if (this.input[this.pos] === '-') numStr += this.input[this.pos++];

// 整数部分
while (this.pos < this.input.length && /[0-9]/.test(this.input[this.pos])) {
numStr += this.input[this.pos++];
}

// 小数部分
if (this.input[this.pos] === '.') {
numStr += this.input[this.pos++];
while (this.pos < this.input.length && /[0-9]/.test(this.input[this.pos])) {
numStr += this.input[this.pos++];
}
}

// 指数部分
if (this.input[this.pos] === 'e' || this.input[this.pos] === 'E') {
numStr += this.input[this.pos++];
if (this.input[this.pos] === '+' || this.input[this.pos] === '-') {
numStr += this.input[this.pos++];
}
while (this.pos < this.input.length && /[0-9]/.test(this.input[this.pos])) {
numStr += this.input[this.pos++];
}
}

return Number(numStr);
}

// 解析对象
private parseObject(): JSONObject {
this.expect('{');
this.skipWhitespace();

const obj: JSONObject = {};

if (this.input[this.pos] !== '}') {
// 解析第一个键值对
this.skipWhitespace();
const key = this.parseString();
this.skipWhitespace();
this.expect(':');
this.skipWhitespace();
obj[key] = this.parseValue();

// 解析后续键值对
while (this.input[this.pos] !== '}') {
this.skipWhitespace();
this.expect(',');
this.skipWhitespace();
const key = this.parseString();
this.skipWhitespace();
this.expect(':');
this.skipWhitespace();
obj[key] = this.parseValue();
}
}

this.expect('}');
return obj;
}

// 解析数组
private parseArray(): JSONValue[] {
this.expect('[');
this.skipWhitespace();

const arr: JSONValue[] = [];

if (this.input[this.pos] !== ']') {
this.skipWhitespace();
arr.push(this.parseValue());

while (this.input[this.pos] !== ']') {
this.skipWhitespace();
this.expect(',');
this.skipWhitespace();
arr.push(this.parseValue());
}
}

this.expect(']');
return arr;
}

// 解析布尔值
private parseBoolean(): boolean {
if (this.input.startsWith('true', this.pos)) {
this.pos += 4;
return true;
}
if (this.input.startsWith('false', this.pos)) {
this.pos += 5;
return false;
}
throw new SyntaxError(`Unexpected token at position ${this.pos}`);
}

// 解析 null
private parseNull(): null {
if (this.input.startsWith('null', this.pos)) {
this.pos += 4;
return null;
}
throw new SyntaxError(`Unexpected token at position ${this.pos}`);
}

private skipWhitespace(): void {
while (this.pos < this.input.length && /\s/.test(this.input[this.pos])) {
this.pos++;
}
}

private expect(char: string): void {
if (this.input[this.pos] !== char) {
throw new SyntaxError(
`Expected '${char}' but got '${this.input[this.pos]}' at position ${this.pos}`
);
}
this.pos++;
}
}

// 使用
const parser = new JSONParser('{"name": "Alice", "age": 25, "hobbies": ["reading", "coding"]}');
console.log(parser.parse());
// { name: 'Alice', age: 25, hobbies: ['reading', 'coding'] }

3. 简易模板编译器

实现类似 Vue 的 {{ expression }} 模板编译:

// ==================== 类型定义 ====================
type TemplateNode = TextNode | ExpressionNode;

interface TextNode {
type: 'Text';
value: string;
}

interface ExpressionNode {
type: 'Expression';
expression: string; // 原始表达式
}

// ==================== 解析模板 ====================
function parseTemplate(template: string): TemplateNode[] {
const nodes: TemplateNode[] = [];
let current = 0;

while (current < template.length) {
// 查找 {{
const openIndex = template.indexOf('{{', current);

if (openIndex === -1) {
// 没有更多表达式,剩余都是文本
nodes.push({ type: 'Text', value: template.slice(current) });
break;
}

// {{ 之前的文本
if (openIndex > current) {
nodes.push({ type: 'Text', value: template.slice(current, openIndex) });
}

// 查找 }}
const closeIndex = template.indexOf('}}', openIndex);
if (closeIndex === -1) {
throw new SyntaxError('Unclosed expression: missing }}');
}

// 提取表达式
const expression = template.slice(openIndex + 2, closeIndex).trim();
nodes.push({ type: 'Expression', expression });

current = closeIndex + 2;
}

return nodes;
}

// ==================== 代码生成 ====================
function compileToFunction(template: string): (data: Record<string, unknown>) => string {
const nodes = parseTemplate(template);

// 生成渲染函数代码
const parts = nodes.map((node) => {
if (node.type === 'Text') {
return JSON.stringify(node.value);
}
// Expression 节点 → 访问 data 上的属性
return `_s(${node.expression})`;
});

const code = `
with (data) {
return ${parts.join(' + ')};
}
`;

// _s: toString 辅助函数,处理 null/undefined
const _s = (val: unknown): string => {
if (val == null) return '';
if (typeof val === 'object') return JSON.stringify(val);
return String(val);
};

// 创建渲染函数
const renderFn = new Function('data', '_s', code) as (
data: Record<string, unknown>,
_s: typeof _s
) => string;

return (data: Record<string, unknown>) => renderFn(data, _s);
}

// ==================== 使用 ====================
const render = compileToFunction(
'Hello, {{ name }}! You are {{ age }} years old. {{ age >= 18 ? "Adult" : "Minor" }}'
);

console.log(render({ name: 'Alice', age: 25 }));
// 'Hello, Alice! You are 25 years old. Adult'

console.log(render({ name: 'Bob', age: 12 }));
// 'Hello, Bob! You are 12 years old. Minor'

进阶版:支持过滤器(Filter)

// 语法:{{ expression | filter1 | filter2(arg) }}
interface FilteredExpression {
expression: string;
filters: Array<{ name: string; args: string[] }>;
}

function parseFilters(raw: string): FilteredExpression {
const parts = raw.split('|').map((s) => s.trim());
const expression = parts[0];

const filters = parts.slice(1).map((filter) => {
const match = filter.match(/^(\w+)(?:\((.*)\))?$/);
if (!match) throw new SyntaxError(`Invalid filter: ${filter}`);
return {
name: match[1],
args: match[2] ? match[2].split(',').map((a) => a.trim()) : [],
};
});

return { expression, filters };
}

// 内置过滤器
const builtinFilters: Record<string, (...args: unknown[]) => unknown> = {
uppercase: (val: unknown) => String(val).toUpperCase(),
lowercase: (val: unknown) => String(val).toLowerCase(),
truncate: (val: unknown, len: unknown) => {
const str = String(val);
const maxLen = Number(len) || 20;
return str.length > maxLen ? str.slice(0, maxLen) + '...' : str;
},
currency: (val: unknown, symbol: unknown) => {
return `${symbol || '$'}${Number(val).toFixed(2)}`;
},
};

function compileWithFilters(template: string): (data: Record<string, unknown>) => string {
const nodes = parseTemplate(template);

return (data: Record<string, unknown>) => {
return nodes
.map((node) => {
if (node.type === 'Text') return node.value;

const { expression, filters } = parseFilters(node.expression);

// 使用 Function 求值(简化实现)
let value: unknown = new Function('data', `with(data) { return ${expression}; }`)(data);

// 依次应用过滤器
for (const filter of filters) {
const fn = builtinFilters[filter.name];
if (!fn) throw new Error(`Unknown filter: ${filter.name}`);
value = fn(value, ...filter.args);
}

return value == null ? '' : String(value);
})
.join('');
};
}

// 使用
const render2 = compileWithFilters(
'{{ name | uppercase }}: {{ price | currency(¥) }}, {{ desc | truncate(10) }}'
);

console.log(render2({ name: 'iPhone', price: 6999, desc: '这是一款非常好用的手机' }));
// 'IPHONE: ¥6999.00, 这是一款非常好用的手...'

4. 设计要点总结

阶段要点
词法分析处理空白、字符串转义、多字符运算符
语法分析运算符优先级(层级规则)、错误恢复
求值/生成类型安全、边界处理(除零、null)

常见面试问题

Q1: 递归下降解析如何处理运算符优先级?

答案

通过语法规则的分层实现。每一层只处理同优先级的运算符:

expression = term (('+' | '-') term)*     ← 低优先级
term = factor (('*' | '/') factor)* ← 高优先级
factor = NUMBER | '(' expression ')' ← 最高(括号)
  • 先解析子节点(高优先级),再组合(低优先级)
  • 2 + 3 * 4 先解析 3 * 4 得到节点,再和 2+
  • 括号通过递归回到顶层规则,强制提升优先级

Q2: 如何处理左递归问题?

答案

直接左递归(如 expr → expr + term)会导致递归下降陷入无限递归。解决方案是改写为循环:

// ❌ 左递归写法(无限递归)
function parseExpr(): ASTNode {
const left = parseExpr(); // 无限递归!
expect('+');
const right = parseTerm();
return binary('+', left, right);
}

// ✅ 改写为循环
function parseExpr(): ASTNode {
let left = parseTerm();
while (peek().value === '+') {
advance();
const right = parseTerm();
left = { type: 'BinaryExpression', operator: '+', left, right };
}
return left;
}

Q3: 手写 JSON Parser 需要注意什么?

答案

  1. 字符串转义:处理 \", \\, \/, \n, \t, \uXXXX
  2. 数字格式:负号、小数、科学计数法(1.5e10
  3. 递归结构:对象和数组可嵌套
  4. 空白处理:键值对之间的空白需跳过
  5. 错误信息:提供有意义的位置信息

Q4: Vue 模板编译和直接字符串拼接有什么区别?

答案

Vue 模板编译器做了远比字符串替换更多的事情:

步骤作用
ParseHTML → AST(tag、attribute、directive、事件等)
Transform静态标记(PatchFlag)、静态节点提升、事件缓存
GenerateAST → 渲染函数(createVNode 调用)

关键优化(Vue 3):

  • 静态提升:纯静态节点只创建一次
  • PatchFlag:标记动态部分,Diff 时只比较变化的属性
  • Block Tree:减少 Diff 范围
  • 缓存事件处理器:避免子组件不必要的更新

Q5: 编译器和解释器的区别?

答案

维度编译器解释器
执行方式先全部编译为目标代码,再执行逐行/逐节点解释执行
产物生成目标代码(机器码/字节码/JS)不生成代码,直接求值
执行速度快(已优化)相对慢
启动速度慢(需编译过程)快(直接开始执行)
前端示例Babel、TypeScript、Vue 模板编译器本文的四则运算 evaluate

实际上,V8 引擎结合了两者:先用解释器(Ignition)快速启动执行,再用编译器(TurboFan)优化热点函数。

相关链接