inula/packages/inula-intl/src/parser/parseMappingRule.ts

243 lines
7.5 KiB
TypeScript

/*
* Copyright (c) 2023 Huawei Technologies Co.,Ltd.
*
* openInula is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
import Lexer from './Lexer';
import { mappingRule } from './mappingRule';
import ruleUtils from '../utils/parseRuleUtils';
import { RawToken } from '../types/types';
const defaultErrorRule = ruleUtils.getRuleOptions('error', { lineBreaks: true, shouldThrow: true });
// 解析规则并生成词法分析器所需的数据结构,以便进行词法分析操作
function parseRules(rules: Record<string, any>, hasStates: boolean): Record<string, any> {
let errorRule: Record<string, any> | null = null;
const fast: Record<string, unknown> = {};
let enableFast = true;
let unicodeFlag: boolean | null = null;
const groups: Record<string, any>[] = [];
const parts: string[] = [];
// 检查是否存在 fallback 规则,若存在则禁用快速匹配
enableFast = isExistsFallback(rules, enableFast);
for (let i = 0; i < rules.length; i++) {
const options = rules[i];
if (options.include) {
throw new Error('Inheritance is not allowed in stateless lexers!');
}
errorRule = isOptionsErrorOrFallback(options, errorRule);
const match = options.match.slice();
if (enableFast) {
// 如果快速匹配允许,则将单字符的规则存入 fast 对象
processFast(match, fast, options);
}
// 检查规则中是否存在不适当的状态切换选项
if (options.pop || options.push || options.next) {
checkStateOptions(hasStates, options);
}
// 只有具有 .match 的规则才会被包含在正则表达式中
if (match.length === 0) {
continue;
}
enableFast = false;
groups.push(options);
// 检查是否所有规则都使用了 unicode 标志,或者都未使用
unicodeFlag = checkUnicode(match, unicodeFlag, options);
const pat = ruleUtils.getRegUnion(match.map(ruleUtils.getReg));
const regexp = new RegExp(pat);
if (regexp.test('')) {
throw new Error('The regex matched the empty string!');
}
const groupCount = ruleUtils.getRegGroups(pat);
if (groupCount > 0) {
throw new Error('The regular expression uses capture groups, use (?: … ) instead!');
}
// 检测规则是否匹配换行符
if (!options.lineBreaks && regexp.test('\n')) {
throw new Error('The matching rule must contain lineBreaks.');
}
parts.push(ruleUtils.getRegCapture(pat));
}
// 如果没有 fallback 规则,则使用 sticky 标志,只在当前索引位置寻找匹配项,如果不支持 sticky 标志,则使用无法被否定的空模式来模拟
const fallbackRule = errorRule && errorRule.fallback;
let flags = ruleUtils.checkSticky() && !fallbackRule ? 'ym' : 'gm';
const suffix = ruleUtils.checkSticky() || fallbackRule ? '' : '|';
if (unicodeFlag === true) {
flags += 'u';
}
const combined = new RegExp(ruleUtils.getRegUnion(parts) + suffix, flags);
return {
regexp: combined,
groups: groups,
fast: fast,
error: errorRule || defaultErrorRule,
};
}
export function checkStateGroup(group: Record<string, any>, name: string, map: Record<string, any>) {
const state = group && (group.push || group.next);
if (state && !map[state]) {
throw new Error('The state is missing.');
}
if (group && group.pop && +group.pop !== 1) {
throw new Error('The value of pop must be 1.');
}
}
// 将国际化解析规则注入分词器中
function parseMappingRule(mappingRule: Record<string, any>, startState?: string): Lexer<RawToken> {
const keys = Object.getOwnPropertyNames(mappingRule);
if (!startState) {
startState = keys[0];
}
// 将每个状态的规则解析为规则数组,并存储在 ruleMap 对象中
const ruleMap = keys.reduce((map, key) => {
map[key] = ruleUtils.getRules(mappingRule[key]);
return map;
}, {});
// 处理规则中的 include 声明,将被包含的规则添加到相应的状态中
for (let i = 0; i < keys.length; i++) {
const key = keys[i];
const rules = ruleMap[key];
const included = {};
for (let j = 0; j < rules.length; j++) {
const rule = rules[j];
if (!rule.include) {
continue;
}
const splice = [j, 1];
if (rule.include !== key && !included[rule.include]) {
included[rule.include] = true;
const newRules = ruleMap[rule.include];
if (!newRules) {
throw new Error('Cannot contain a state that does not exist!');
}
newRules.forEach(newRule => {
if (!rules.includes(newRule)) {
splice.push(newRule);
}
});
}
// eslint-disable-next-line
rules.splice.apply(rules, splice);
j--;
}
}
const mappingAllRules = {};
// 将规则映射为词法分析器数据结构,并存储在 mappingAllRules 对象中
keys.forEach(key => {
mappingAllRules[key] = parseRules(ruleMap[key], true);
});
// 检查状态组中的规则是否正确引用了其他状态
keys.forEach(name => {
const state = mappingAllRules[name];
const groups = state.groups;
groups.forEach(group => {
checkStateGroup(group, name, mappingAllRules);
});
const fastKeys = Object.getOwnPropertyNames(state.fast);
fastKeys.forEach(fastKey => {
checkStateGroup(state.fast[fastKey], name, mappingAllRules);
});
});
return new Lexer(mappingAllRules, startState);
}
function processFast(match, fast: Record<string, unknown>, options) {
while (match.length && typeof match[0] === 'string' && match[0].length === 1) {
const word = match.shift();
fast[word.charCodeAt(0)] = options;
}
}
function handleErrorRule(options, errorRule: Record<string, any>) {
if (!options.fallback === !errorRule.fallback) {
throw new Error('errorRule can only set one!');
} else {
throw new Error('fallback and error cannot be set at the same time!');
}
}
function checkUnicode(match, unicodeFlag, options) {
for (let j = 0; j < match.length; j++) {
const obj = match[j];
if (!ruleUtils.checkRegExp(obj)) {
continue;
}
if (unicodeFlag === null) {
unicodeFlag = obj.unicode;
} else if (unicodeFlag !== obj.unicode && options.fallback === false) {
throw new Error('If the /u flag is used, all!');
}
}
return unicodeFlag;
}
function checkStateOptions(hasStates: boolean, options) {
if (!hasStates) {
throw new Error('State toggle options are not allowed in stateless tokenizers!');
}
if (options.fallback) {
throw new Error('State toggle options are not allowed on fallback tokens!');
}
}
function isExistsFallback(rules: Record<string, any>, enableFast: boolean) {
for (let i = 0; i < rules.length; i++) {
if (rules[i].fallback) {
enableFast = false;
}
}
return enableFast;
}
function isOptionsErrorOrFallback(options, errorRule: Record<string, any> | null) {
if (options.error || options.fallback) {
// 只能设置一个 errorRule
if (errorRule) {
handleErrorRule(options, errorRule);
}
errorRule = options;
}
return errorRule;
}
export const lexer = parseMappingRule(mappingRule);
export default parseMappingRule;