inula/packages/inula-intl/src/parser/Lexer.ts

221 lines
5.9 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2023 Huawei Technologies Co.,Ltd.
*
* openInula is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
import ruleUtils from '../utils/parseRuleUtils';
import { LexerInterface } from '../types/interfaces';
/**
* 词法解析器主要根据设计的规则对message进行处理成Token
*/
class Lexer<T> implements LexerInterface<T> {
readonly startState: string;
readonly unionReg: Record<string, any>;
private buffer = '';
private stack: string[] = [];
private index = 0;
private line = 1;
private col = 1;
private queuedText = '';
private state = '';
private groups: string[] = [];
private error: Record<string, any> | undefined;
private regexp: any;
private fast: Record<string, unknown> = {};
private queuedGroup: string | null = '';
private value = '';
constructor(unionReg: Record<string, any>, startState: string) {
this.startState = startState;
this.unionReg = unionReg;
this.buffer = '';
this.stack = [];
this.reset();
}
/**
* 根据新的消息重置解析器
* @param data 消息数据
*/
public reset(data?: string) {
this.buffer = data || '';
this.index = 0;
this.line = 1;
this.col = 1;
this.queuedText = '';
this.setState(this.startState);
this.stack = [];
return this;
}
private setState(state: string) {
if (!state || this.state === state) {
return;
}
this.state = state;
const info = this.unionReg[state];
this.groups = info.groups;
this.error = info.error;
this.regexp = info.regexp;
this.fast = info.fast;
}
private popState() {
this.setState(<string>this.stack.pop());
}
private pushState(state: string) {
this.stack.push(this.state);
this.setState(state);
}
private getGroup(match: Record<string, object>) {
const groupCount = this.groups.length;
for (let i = 0; i < groupCount; i++) {
if (match[i + 1] !== undefined) {
return this.groups[i];
}
}
throw new Error('No token type found matching text!');
}
private tokenToString() {
return this.value;
}
/**
* 迭代获取下一个 token
*/
public next() {
const index = this.index;
if (this.queuedGroup) {
const token = this.getToken(this.queuedGroup, this.queuedText, index);
this.queuedGroup = null;
this.queuedText = '';
return token;
}
const buffer = this.buffer;
if (index === buffer.length) {
return;
}
const fastGroup = this.fast[buffer.charCodeAt(index)];
if (fastGroup) {
return this.getToken(fastGroup, buffer.charAt(index), index);
}
// 如果没有快速匹配,那么使用预先编译的正则表达式进行匹配操作
const regexp = this.regexp;
regexp.lastIndex = index;
const match = getMatch(regexp, buffer);
const error = this.error;
if (match == null) {
return this.getToken(error, buffer.slice(index, buffer.length), index);
}
const group = this.getGroup(match);
const text = match[0];
if (error?.fallback && match.index !== index) {
this.queuedGroup = group;
this.queuedText = text;
return this.getToken(error, buffer.slice(index, match.index), index);
}
return this.getToken(group, text, index);
}
/**
* 获取Token
* @param group 解析模板后获得的属性值
* @param text 文本属性的信息
* @param offset 偏移量
* @private
*/
private getToken(group: any, text: string, offset: number) {
let lineNum = 0;
let last = 1; // 最后一个换行符的索引位置
if (group.lineBreaks) {
const matchNL = /\n/g;
if (text === '\n') {
lineNum = 1;
} else {
while (matchNL.exec(text)) {
lineNum++;
last = matchNL.lastIndex;
}
}
}
const token = {
type: (typeof group.type === 'function' && group.type(text)) || group.defaultType,
value: typeof group.value === 'function' ? group.value(text) : text,
text: text,
toString: this.tokenToString,
offset: offset, // 标记在输入 buffer 中的偏移量
lineBreaks: lineNum,
line: this.line, // token 所在的行号
col: this.col, // token 所在的列号
};
const size = text.length;
this.index += size;
this.line += lineNum;
if (lineNum !== 0) {
this.col = size - last + 1;
} else {
this.col += size;
}
if (group.shouldThrow) {
throw new Error('Invalid Syntax!');
}
if (group.pop) {
this.popState();
} else if (group.push) {
this.pushState(group.push);
} else if (group.next) {
this.setState(group.next);
}
return token;
}
// 增加迭代器,允许逐个访问集合中的元素方法
[Symbol.iterator]() {
return {
next: (): IteratorResult<T> => {
const token = this.next();
return { value: token, done: !token } as IteratorResult<T>;
},
};
}
}
/**
* 根据正则表达式获取匹配到message的值
* 索引为 0 的元素是完整的匹配结果。
* 索引为 1、2、3 等的元素是正则表达式中指定的捕获组的匹配结果。
*/
const getMatch = ruleUtils.checkSticky()
? // 正则表达式具有 sticky 标志
(regexp: any, buffer: string) => regexp.exec(buffer)
: // 正则表达式具有 global 标志,匹配的字符串长度为 0则表示匹配失败
(regexp: any, buffer: string) => (regexp.exec(buffer)[0].length === 0 ? null : regexp.exec(buffer));
export default Lexer;