LMAssembly // Implementing InputToken support.
This commit is contained in:
parent
87f7328636
commit
c5ce9199bd
|
@ -0,0 +1,247 @@
|
|||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// ====================
|
||||
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
|
||||
// ... with NTL restriction stating that:
|
||||
// No trademark license is granted to use the trade names, trademarks, service
|
||||
// marks, or product names of Contributor, except as required to fulfill notice
|
||||
// requirements defined in MIT License.
|
||||
|
||||
import Foundation
|
||||
|
||||
/// 工作原理:先用 InputToken.parse 分析原始字串,給出準確的 Token。
|
||||
/// 然後再讓這個 Token 用 .translated() 自我表述出轉換結果。
|
||||
|
||||
public enum InputToken {
|
||||
case timeZone(shortened: Bool)
|
||||
case timeNow(shortened: Bool)
|
||||
case date(dayDelta: Int = 0, yearDelta: Int = 0, shortened: Bool = true, luna: Bool = false)
|
||||
case week(dayDelta: Int = 0, shortened: Bool = true)
|
||||
case year(yearDelta: Int = 0)
|
||||
case yearGanzhi(yearDelta: Int = 0)
|
||||
case yearZodiac(yearDelta: Int = 0)
|
||||
}
|
||||
|
||||
// MARK: - 正式對外投入使用的 API。
|
||||
|
||||
public extension String {
|
||||
func parseAsInputToken(isCHS: Bool) -> [String] {
|
||||
InputToken.parse(from: self).map { $0.translated(isCHS: isCHS) }.flatMap { $0 }.deduplicated
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Parser parsing raw token value to construct token.
|
||||
|
||||
public extension InputToken {
|
||||
static func parse(from rawToken: String) -> [InputToken] {
|
||||
var result: [InputToken] = []
|
||||
guard rawToken.prefix(6) == "MACRO@" else { return result }
|
||||
var mapParams: [String: Int] = [:]
|
||||
let tokenComponents = rawToken.dropFirst(6).split(separator: "_").map { param in
|
||||
let result = param.uppercased()
|
||||
let kvPair = param.split(separator: ":")
|
||||
guard kvPair.count == 2 else { return result }
|
||||
guard let pairValue = Int(kvPair[1]) else { return result }
|
||||
mapParams[kvPair[0].description] = pairValue
|
||||
return result
|
||||
}
|
||||
guard !tokenComponents.isEmpty else { return result }
|
||||
// 準備接收參數。
|
||||
let dayDelta: Int = mapParams["dayDelta".uppercased()] ?? 0
|
||||
let yearDelta: Int = mapParams["yearDelta".uppercased()] ?? 0
|
||||
let shortened: Bool = tokenComponents.contains("SHORTENED")
|
||||
let hasZodiac: Bool = tokenComponents.contains("ZODIAC")
|
||||
let hasGanzhi: Bool = tokenComponents.contains("GANZHI")
|
||||
let hasLuna: Bool = tokenComponents.contains("LUNA")
|
||||
|
||||
switch tokenComponents[0] {
|
||||
case "TIMEZONE": result.append(.timeZone(shortened: shortened))
|
||||
case "TIME": result.append(.timeNow(shortened: shortened))
|
||||
case "DATE": result.append(.date(dayDelta: dayDelta, yearDelta: yearDelta, shortened: shortened, luna: hasLuna))
|
||||
case "WEEK": result.append(.week(dayDelta: dayDelta, shortened: shortened))
|
||||
case "YEAR": result.append(.year(yearDelta: yearDelta)) // 始終插入公曆年,方便對比參考。
|
||||
if hasZodiac { result.append(.yearZodiac(yearDelta: yearDelta)) }
|
||||
if hasGanzhi { result.append(.yearGanzhi(yearDelta: yearDelta)) }
|
||||
default: break
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Parser parsing token itself.
|
||||
|
||||
public extension InputToken {
|
||||
func translated(isCHS: Bool) -> [String] {
|
||||
let locale = Locale(identifier: isCHS ? "zh-Hans" : "zh-Hant-TW")
|
||||
let formatter = DateFormatter()
|
||||
formatter.locale = locale
|
||||
let currentDate = Date()
|
||||
var dateToDescribe = currentDate // 接下來會針對給定參數修正這個資料值。
|
||||
var results: [String] = []
|
||||
|
||||
/// 內部函式,用來修正 dateToDescribe 自身的參數值。
|
||||
func applyDelta(for type: Calendar.Component, delta deltaValue: Int) {
|
||||
switch type {
|
||||
case .year:
|
||||
var delta = DateComponents()
|
||||
let thisYear = Calendar.current.dateComponents([.year], from: currentDate).year ?? 2018
|
||||
delta.year = max(deltaValue, thisYear * -1)
|
||||
dateToDescribe = Calendar.current.date(byAdding: delta, to: currentDate) ?? currentDate
|
||||
case .day:
|
||||
let dayLength = 60 * 60 * 24
|
||||
dateToDescribe = dateToDescribe.addingTimeInterval(Double(dayLength * deltaValue))
|
||||
default: break
|
||||
}
|
||||
}
|
||||
|
||||
// 計算結果。
|
||||
switch self {
|
||||
case let .timeZone(shortened): // 時區
|
||||
let resultToAdd = TimeZone.current.localizedName(
|
||||
for: shortened ? .shortGeneric : .standard, locale: locale
|
||||
) ?? TimeZone.current.description
|
||||
results.append(resultToAdd)
|
||||
case let .timeNow(shortened): // 當前時間
|
||||
var formats = [String]()
|
||||
switch (isCHS, shortened) {
|
||||
case (false, true): formats.append(contentsOf: ["HH:mm", "HH點mm分", "HH時mm分"])
|
||||
case (false, false): formats.append(contentsOf: ["HH:mm:ss", "HH點mm分ss秒", "HH時mm分ss秒"])
|
||||
case (true, true): formats.append(contentsOf: ["HH:mm", "HH点mm分", "HH时mm分"])
|
||||
case (true, false): formats.append(contentsOf: ["HH:mm:ss", "HH点mm分ss秒", "HH时mm分ss秒"])
|
||||
}
|
||||
formats.forEach { formatString in
|
||||
formatter.dateFormat = formatString
|
||||
results.append(formatter.string(from: dateToDescribe))
|
||||
}
|
||||
let resultsExtra: [String] = results.compactMap {
|
||||
guard !$0.contains(":") else { return nil }
|
||||
var newResult = $0
|
||||
if newResult.first == "0" { newResult = newResult.dropFirst().description }
|
||||
if newResult.prefix(2) == "2点" || newResult.prefix(2) == "2點" {
|
||||
newResult = (isCHS ? "两点" : "兩點") + newResult.dropFirst(2).description
|
||||
}
|
||||
newResult = newResult.convertArabicNumeralsToChinese(onlyDigits: false)
|
||||
newResult = newResult.replacingOccurrences(of: "〇", with: "零")
|
||||
return newResult
|
||||
}
|
||||
results.append(contentsOf: resultsExtra)
|
||||
case let .date(dayDelta, yearDelta, shortened, hasLuna): // 日期
|
||||
applyDelta(for: .year, delta: yearDelta)
|
||||
applyDelta(for: .day, delta: dayDelta)
|
||||
// 農曆單獨處理。
|
||||
guard !hasLuna else {
|
||||
formatter.calendar = .init(identifier: .chinese)
|
||||
formatter.dateStyle = .medium
|
||||
formatter.dateFormat = "MMMd"
|
||||
let dateString = formatter.string(from: dateToDescribe)
|
||||
formatter.dateFormat = "U"
|
||||
let yearGanzhi = formatter.string(from: dateToDescribe)
|
||||
results.append("\(yearGanzhi)年\(dateString)")
|
||||
if let yearZodiac = mapGanzhiToZodiac[yearGanzhi] {
|
||||
results.append("\(isCHS ? yearZodiac.1 : yearZodiac.0)年\(dateString)")
|
||||
}
|
||||
break
|
||||
}
|
||||
let formats: [String] = [
|
||||
"MM-dd", "M月d日", "MM月dd日",
|
||||
]
|
||||
var additionalResult: String?
|
||||
for (i, formatString) in formats.enumerated() {
|
||||
formatter.dateFormat = formatString
|
||||
let dateStr = formatter.string(from: dateToDescribe)
|
||||
switch (i == 0, shortened) {
|
||||
case (false, true): formatter.dateFormat = "yy年"
|
||||
case (true, false): formatter.dateFormat = "y-"
|
||||
case (false, false): formatter.dateFormat = "y年"
|
||||
case (true, true): formatter.dateFormat = "yy-"
|
||||
}
|
||||
let yearStr = formatter.string(from: dateToDescribe)
|
||||
if i == 1 {
|
||||
let anotherDateStr = dateStr.convertArabicNumeralsToChinese(onlyDigits: false)
|
||||
let anotherYearStr = yearStr.convertArabicNumeralsToChinese(onlyDigits: true)
|
||||
additionalResult = anotherYearStr + anotherDateStr
|
||||
}
|
||||
let newResult = yearStr + dateStr
|
||||
guard !results.contains(newResult) else { continue }
|
||||
results.append(newResult)
|
||||
}
|
||||
if let additionalResult = additionalResult {
|
||||
results.append(additionalResult)
|
||||
}
|
||||
case let .week(dayDelta, shortened): // 星期
|
||||
applyDelta(for: .day, delta: dayDelta)
|
||||
formatter.dateFormat = shortened ? "EE" : "EEEE"
|
||||
results.append(formatter.string(from: dateToDescribe))
|
||||
case let .year(yearDelta): // 年度
|
||||
applyDelta(for: .year, delta: yearDelta)
|
||||
formatter.dateFormat = "U年"
|
||||
formatter.calendar = .init(identifier: .gregorian)
|
||||
let result = formatter.string(from: dateToDescribe)
|
||||
results.append(result)
|
||||
results.append(result.convertArabicNumeralsToChinese(onlyDigits: true))
|
||||
case let .yearGanzhi(yearDelta): // 幹支(其實嚴格來講「干支」才是錯的)
|
||||
applyDelta(for: .year, delta: yearDelta)
|
||||
formatter.dateFormat = "U年"
|
||||
formatter.calendar = .init(identifier: .chinese)
|
||||
let result = formatter.string(from: dateToDescribe)
|
||||
results.append(result)
|
||||
case let .yearZodiac(yearDelta): // 十二生肖
|
||||
applyDelta(for: .year, delta: yearDelta)
|
||||
formatter.dateFormat = "U"
|
||||
formatter.calendar = .init(identifier: .chinese)
|
||||
let rawKey = formatter.string(from: dateToDescribe)
|
||||
guard let rawResultPair = mapGanzhiToZodiac[rawKey] else { break }
|
||||
let rawResult = isCHS ? rawResultPair.1 : rawResultPair.0
|
||||
results.append(rawResult + "年")
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
}
|
||||
|
||||
/// 註一:天干地支在簡體中文與繁體中文的寫法完全雷同。
|
||||
/// 註二:此處採吐蕃的陰陽五行生肖法、而非突厥五行納音生肖法。
|
||||
private let mapGanzhiToZodiac: [String: (String, String)] = [
|
||||
"甲子": ("木鼠", "木鼠"), "乙丑": ("木牛", "木牛"), "丙寅": ("火虎", "火虎"), "丁卯": ("火兔", "火兔"),
|
||||
"戊辰": ("土龍", "土龙"), "己巳": ("土蛇", "土蛇"), "庚午": ("金馬", "金马"), "辛未": ("金羊", "金羊"),
|
||||
"壬申": ("水猴", "水猴"), "癸酉": ("水雞", "水鸡"), "甲戌": ("木狗", "木狗"), "乙亥": ("木豬", "木猪"),
|
||||
"丙子": ("火鼠", "火鼠"), "丁丑": ("火牛", "火牛"), "戊寅": ("土虎", "土虎"), "己卯": ("土兔", "土兔"),
|
||||
"庚辰": ("金龍", "金龙"), "辛巳": ("金蛇", "金蛇"), "壬午": ("水馬", "水马"), "癸未": ("水羊", "水羊"),
|
||||
"甲申": ("木猴", "木猴"), "乙酉": ("木雞", "木鸡"), "丙戌": ("火狗", "火狗"), "丁亥": ("火豬", "火猪"),
|
||||
"戊子": ("土鼠", "土鼠"), "己丑": ("土牛", "土牛"), "庚寅": ("金虎", "金虎"), "辛卯": ("金兔", "金兔"),
|
||||
"壬辰": ("水龍", "水龙"), "癸巳": ("水蛇", "水蛇"), "甲午": ("木馬", "木马"), "乙未": ("木羊", "木羊"),
|
||||
"丙申": ("火猴", "火猴"), "丁酉": ("火雞", "火鸡"), "戊戌": ("土狗", "土狗"), "己亥": ("土豬", "土猪"),
|
||||
"庚子": ("金鼠", "金鼠"), "辛丑": ("金牛", "金牛"), "壬寅": ("水虎", "水虎"), "癸卯": ("水兔", "水兔"),
|
||||
"甲辰": ("木龍", "木龙"), "乙巳": ("木蛇", "木蛇"), "丙午": ("火馬", "火马"), "丁未": ("火羊", "火羊"),
|
||||
"戊申": ("土猴", "土猴"), "己酉": ("土雞", "土鸡"), "庚戌": ("金狗", "金狗"), "辛亥": ("金豬", "金猪"),
|
||||
"壬子": ("水鼠", "水鼠"), "癸丑": ("水牛", "水牛"), "甲寅": ("木虎", "木虎"), "乙卯": ("木兔", "木兔"),
|
||||
"丙辰": ("火龍", "火龙"), "丁巳": ("火蛇", "火蛇"), "戊午": ("土馬", "土马"), "己未": ("土羊", "土羊"),
|
||||
"庚申": ("金猴", "金猴"), "辛酉": ("金雞", "金鸡"), "壬戌": ("水狗", "水狗"), "癸亥": ("水豬", "水猪"),
|
||||
]
|
||||
|
||||
// MARK: - Date Time Language Conversion Extension
|
||||
|
||||
private let tableMappingArabicDatesToChinese: [String: String] = {
|
||||
let formatter = NumberFormatter()
|
||||
formatter.locale = Locale(identifier: "zh-Hant-TW") // 預設是英文,設定為中文。繁簡一致。
|
||||
formatter.numberStyle = .spellOut
|
||||
var result = [String: String]()
|
||||
for i in 0 ... 60 {
|
||||
result[i.description] = formatter.string(from: NSNumber(value: i))
|
||||
}
|
||||
return result
|
||||
}()
|
||||
|
||||
private extension String {
|
||||
/// 將給定的字串當中的阿拉伯數字轉為漢語小寫,逐字轉換。
|
||||
/// - Parameter target: 要進行轉換操作的對象,會直接修改該對象。
|
||||
func convertArabicNumeralsToChinese(onlyDigits: Bool) -> String {
|
||||
var target = self
|
||||
let sortedKeys = tableMappingArabicDatesToChinese.keys.sorted { $0.count > $1.count }
|
||||
for key in sortedKeys {
|
||||
if onlyDigits, key.count > 1 { continue }
|
||||
guard let result = tableMappingArabicDatesToChinese[key] else { continue }
|
||||
target = target.replacingOccurrences(of: key, with: result)
|
||||
}
|
||||
return target
|
||||
}
|
||||
}
|
|
@ -343,14 +343,26 @@ public extension vChewingLM {
|
|||
}
|
||||
}
|
||||
|
||||
// 新增與日期、時間、星期有關的單元圖資料
|
||||
// 分析且處理可能存在的 InputToken。
|
||||
rawAllUnigrams = rawAllUnigrams.map { unigram in
|
||||
let convertedValues = unigram.value.parseAsInputToken(isCHS: isCHS)
|
||||
guard !convertedValues.isEmpty else { return [unigram] }
|
||||
var result = [Megrez.Unigram]()
|
||||
convertedValues.enumerated().forEach { absDelta, value in
|
||||
let newScore: Double = -80 - Double(absDelta) * 0.01
|
||||
result.append(.init(value: value, score: newScore))
|
||||
}
|
||||
return result
|
||||
}.flatMap { $0 }
|
||||
|
||||
// 新增與日期、時間、星期有關的單元圖資料。
|
||||
rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: keyChain))
|
||||
|
||||
if keyChain == "_punctuation_list" {
|
||||
rawAllUnigrams.append(contentsOf: getHaninSymbolMenuUnigrams())
|
||||
}
|
||||
|
||||
// 提前處理語彙置換
|
||||
// 提前處理語彙置換。
|
||||
if isPhraseReplacementEnabled {
|
||||
for i in 0 ..< rawAllUnigrams.count {
|
||||
let newValue = lmReplacements.valuesFor(key: rawAllUnigrams[i].value)
|
||||
|
|
|
@ -13,85 +13,70 @@ import Megrez
|
|||
|
||||
extension vChewingLM.LMInstantiator {
|
||||
func queryDateTimeUnigrams(with key: String = "") -> [Megrez.Unigram] {
|
||||
if !["ㄖˋ-ㄑㄧ", "ㄖˋ-ㄑㄧˊ", "ㄕˊ-ㄐㄧㄢ", "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ"].contains(key) { return .init() }
|
||||
guard let tokenTrigger = TokenTrigger(rawValue: key) else { return [] }
|
||||
var results = [Megrez.Unigram]()
|
||||
let theLocale = Locale(identifier: "zh-Hant")
|
||||
let currentDate = Date()
|
||||
var delta = DateComponents()
|
||||
let thisYear = Calendar.current.dateComponents([.year], from: currentDate).year ?? 2018
|
||||
delta.year = max(min(deltaOfCalendarYears, 0), thisYear * -1)
|
||||
let currentDateShortened = Calendar.current.date(byAdding: delta, to: currentDate)
|
||||
switch key {
|
||||
case "ㄖˋ-ㄑㄧ", "ㄖˋ-ㄑㄧˊ":
|
||||
let formatterDate1 = DateFormatter()
|
||||
let formatterDate2 = DateFormatter()
|
||||
formatterDate1.dateFormat = "yyyy-MM-dd"
|
||||
formatterDate2.dateFormat = "yyyy年MM月dd日"
|
||||
let date1 = formatterDate1.string(from: currentDate)
|
||||
let date2 = formatterDate2.string(from: currentDate)
|
||||
var date3 = date2.convertArabicNumeralsToChinese
|
||||
date3 = date3.replacingOccurrences(of: "年〇", with: "年")
|
||||
date3 = date3.replacingOccurrences(of: "月〇", with: "月")
|
||||
results.append(.init(value: date1, score: -94))
|
||||
results.append(.init(value: date2, score: -95))
|
||||
results.append(.init(value: date3, score: -96))
|
||||
if let currentDateShortened = currentDateShortened, delta.year != 0 {
|
||||
var dateAlt1: String = formatterDate1.string(from: currentDateShortened)
|
||||
dateAlt1.regReplace(pattern: #"^0+"#)
|
||||
var dateAlt2: String = formatterDate2.string(from: currentDateShortened)
|
||||
dateAlt2.regReplace(pattern: #"^0+"#)
|
||||
var dateAlt3 = dateAlt2.convertArabicNumeralsToChinese
|
||||
dateAlt3 = dateAlt3.replacingOccurrences(of: "年〇", with: "年")
|
||||
dateAlt3 = dateAlt3.replacingOccurrences(of: "月〇", with: "月")
|
||||
results.append(.init(value: dateAlt1, score: -97))
|
||||
results.append(.init(value: dateAlt2, score: -98))
|
||||
results.append(.init(value: dateAlt3, score: -99))
|
||||
}
|
||||
case "ㄕˊ-ㄐㄧㄢ":
|
||||
let formatterTime1 = DateFormatter()
|
||||
let formatterTime2 = DateFormatter()
|
||||
let formatterTime3 = DateFormatter()
|
||||
formatterTime1.dateFormat = "HH:mm"
|
||||
formatterTime2.dateFormat = isCHS ? "HH点mm分" : "HH點mm分"
|
||||
formatterTime3.dateFormat = isCHS ? "HH时mm分" : "HH時mm分"
|
||||
let time1 = formatterTime1.string(from: currentDate)
|
||||
let time2 = formatterTime2.string(from: currentDate)
|
||||
let time3 = formatterTime3.string(from: currentDate)
|
||||
results.append(.init(value: time1, score: -97))
|
||||
results.append(.init(value: time2, score: -98))
|
||||
results.append(.init(value: time3, score: -99))
|
||||
case "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ":
|
||||
let formatterWeek1 = DateFormatter()
|
||||
let formatterWeek2 = DateFormatter()
|
||||
formatterWeek1.dateFormat = "EEEE"
|
||||
formatterWeek2.dateFormat = "EE"
|
||||
formatterWeek1.locale = theLocale
|
||||
formatterWeek2.locale = theLocale
|
||||
let week1 = formatterWeek1.string(from: currentDate)
|
||||
let week2 = formatterWeek2.string(from: currentDate)
|
||||
results.append(.init(value: week1, score: -98))
|
||||
results.append(.init(value: week2, score: -99))
|
||||
default: return .init()
|
||||
var tokens: [String] = []
|
||||
|
||||
func processDateWithDayDelta(_ delta: Int) {
|
||||
tokens = ["MACRO@DATE_DAYDELTA:\(delta)"]
|
||||
if deltaOfCalendarYears != 0 { tokens.append("MACRO@DATE_DAYDELTA:\(delta)_YEARDELTA:\(deltaOfCalendarYears)") }
|
||||
tokens.append("MACRO@DATE_DAYDELTA:\(delta)_SHORTENED")
|
||||
tokens.append("MACRO@DATE_DAYDELTA:\(delta)_LUNA")
|
||||
}
|
||||
|
||||
func processYearWithYearDelta(_ delta: Int) {
|
||||
tokens = ["MACRO@YEAR_YEARDELTA:\(delta)"]
|
||||
if deltaOfCalendarYears != 0 { tokens.append("MACRO@YEAR_YEARDELTA:\(delta + deltaOfCalendarYears)") }
|
||||
tokens.append("MACRO@YEAR_GANZHI_YEARDELTA:\(delta)")
|
||||
tokens.append("MACRO@YEAR_ZODIAC_YEARDELTA:\(delta)")
|
||||
}
|
||||
|
||||
switch tokenTrigger {
|
||||
case .jin1tian1ri4qi2, .jin1tian1ri4qi1: processDateWithDayDelta(0) // 今天日期
|
||||
case .zuo2tian1ri4qi2, .zuo2tian1ri4qi1: processDateWithDayDelta(-1) // 昨天日期
|
||||
case .qian2tian1ri4qi2, .qian2tian1ri4qi1: processDateWithDayDelta(-2) // 前天日期
|
||||
case .ming2tian1ri4qi2, .ming2tian1ri4qi1: processDateWithDayDelta(1) // 明天日期
|
||||
case .hou4tian1ri4qi1, .hou4tian1ri4qi2: processDateWithDayDelta(2) // 後天日期
|
||||
case .jin1nian2nian2du4: processYearWithYearDelta(0) // 今年年度
|
||||
case .qu4nian2nian2du4: processYearWithYearDelta(-1) // 去年年度
|
||||
case .qian2nian2nian2du4: processYearWithYearDelta(-2) // 前年年度
|
||||
case .ming2nian2nian2du4: processYearWithYearDelta(1) // 明年年度
|
||||
case .hou4nian2nian2du4: processYearWithYearDelta(2) // 後年年度
|
||||
case .shi2jian1: tokens = ["MACRO@TIME_SHORTENED"] // 時間
|
||||
case .xing1qi1, .xing1qi2: tokens = ["MACRO@WEEK_SHORTENED", "MACRO@WEEK"] // 星期
|
||||
case .suo3zai4shi2qu1, .dang1qian2shi2qu1, .mu4qian2shi2qu1: tokens = ["MACRO@TIMEZONE", "MACRO@TIMEZONE_SHORTENED"] // 時區
|
||||
}
|
||||
// 終末處理。
|
||||
let values = tokens.map { $0.parseAsInputToken(isCHS: isCHS) }.flatMap { $0 }.deduplicated
|
||||
var i: Double = -99
|
||||
for strValue in values.reversed() {
|
||||
results.insert(.init(value: strValue, score: i), at: 0)
|
||||
i += 1
|
||||
}
|
||||
return results
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Date Time Language Conversion Extension
|
||||
|
||||
private let tableMappingArabicNumeralsToChinese: [String: String] = [
|
||||
"0": "〇", "1": "一", "2": "二", "3": "三", "4": "四", "5": "五", "6": "六", "7": "七", "8": "八", "9": "九",
|
||||
]
|
||||
|
||||
private extension String {
|
||||
/// 將給定的字串當中的阿拉伯數字轉為漢語小寫,逐字轉換。
|
||||
/// - Parameter target: 要進行轉換操作的對象,會直接修改該對象。
|
||||
var convertArabicNumeralsToChinese: String {
|
||||
var target = self
|
||||
for key in tableMappingArabicNumeralsToChinese.keys {
|
||||
guard let result = tableMappingArabicNumeralsToChinese[key] else { continue }
|
||||
target = target.replacingOccurrences(of: key, with: result)
|
||||
}
|
||||
return target
|
||||
}
|
||||
private enum TokenTrigger: String {
|
||||
case shi2jian1 = "ㄕˊ-ㄐㄧㄢ"
|
||||
case xing1qi1 = "ㄒㄧㄥ-ㄑㄧ"
|
||||
case xing1qi2 = "ㄒㄧㄥ-ㄑㄧˊ"
|
||||
case jin1nian2nian2du4 = "ㄐㄧㄣ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||
case qu4nian2nian2du4 = "ㄑㄩˋ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||
case ming2nian2nian2du4 = "ㄇㄧㄥˊ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||
case qian2nian2nian2du4 = "ㄑㄧㄢˊ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||
case hou4nian2nian2du4 = "ㄏㄡˋ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||
case jin1tian1ri4qi2 = "ㄐㄧㄣ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||
case ming2tian1ri4qi2 = "ㄇㄧㄥˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||
case zuo2tian1ri4qi2 = "ㄗㄨㄛˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||
case qian2tian1ri4qi2 = "ㄑㄧㄢˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||
case hou4tian1ri4qi2 = "ㄏㄡˋ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||
case jin1tian1ri4qi1 = "ㄐㄧㄣ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||
case ming2tian1ri4qi1 = "ㄇㄧㄥˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||
case zuo2tian1ri4qi1 = "ㄗㄨㄛˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||
case qian2tian1ri4qi1 = "ㄑㄧㄢˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||
case hou4tian1ri4qi1 = "ㄏㄡˋ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||
case dang1qian2shi2qu1 = "ㄉㄤ-ㄑㄧㄢˊ-ㄕˊ-ㄑㄩ"
|
||||
case mu4qian2shi2qu1 = "ㄇㄨˋ-ㄑㄧㄢˊ-ㄕˊ-ㄑㄩ"
|
||||
case suo3zai4shi2qu1 = "ㄙㄨㄛˇ-ㄗㄞˋ-ㄕˊ-ㄑㄩ"
|
||||
}
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
//// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// ====================
|
||||
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
|
||||
// ... with NTL restriction stating that:
|
||||
// No trademark license is granted to use the trade names, trademarks, service
|
||||
// marks, or product names of Contributor, except as required to fulfill notice
|
||||
// requirements defined in MIT License.
|
||||
|
||||
import Foundation
|
||||
import XCTest
|
||||
|
||||
@testable import LangModelAssembly
|
||||
|
||||
final class InputTokenTests: XCTestCase {
|
||||
func testTranslatingTokens_1_TimeZone() throws {
|
||||
print("測試時區俗稱:" + "MACRO@TIMEZONE_SHORTENED".parseAsInputToken(isCHS: false).description)
|
||||
print("測試時區全稱:" + "MACRO@TIMEZONE".parseAsInputToken(isCHS: false).description)
|
||||
}
|
||||
|
||||
func testTranslatingTokens_2_TimeNow() throws {
|
||||
print("測試時間時分:" + "MACRO@TIME_SHORTENED".parseAsInputToken(isCHS: false).description)
|
||||
print("測試帶秒時間:" + "MACRO@TIME".parseAsInputToken(isCHS: true).description)
|
||||
}
|
||||
|
||||
func testTranslatingTokens_3_Date() throws {
|
||||
print("測試農曆:" + "MACRO@DATE_LUNA".parseAsInputToken(isCHS: true).description)
|
||||
print("測試二戰勝利紀年:" + "MACRO@DATE_YEARDELTA:-1945".parseAsInputToken(isCHS: true).description)
|
||||
print("測試短日期之135天前:" + "MACRO@DATE_DAYDELTA:-135_SHORTENED".parseAsInputToken(isCHS: true).description)
|
||||
print("測試長日期之135天前:" + "MACRO@DATE_DAYDELTA:-135".parseAsInputToken(isCHS: true).description)
|
||||
print("測試短日期之今天:" + "MACRO@DATE_SHORTENED".parseAsInputToken(isCHS: true).description)
|
||||
print("測試長日期之今天:" + "MACRO@DATE".parseAsInputToken(isCHS: true).description)
|
||||
print("測試短日期之明天:" + "MACRO@DATE_SHORTENED_DAYDELTA:1".parseAsInputToken(isCHS: true).description)
|
||||
print("測試長日期之明天:" + "MACRO@DATE_DAYDELTA:1".parseAsInputToken(isCHS: true).description)
|
||||
print("測試短日期之明年:" + "MACRO@DATE_SHORTENED_YEARDELTA:1".parseAsInputToken(isCHS: true).description)
|
||||
print("測試長日期之明年:" + "MACRO@DATE_YEARDELTA:1".parseAsInputToken(isCHS: true).description)
|
||||
}
|
||||
|
||||
func testTranslatingTokens_4_Week() throws {
|
||||
print("測試今天星期幾:" + "MACRO@WEEK".parseAsInputToken(isCHS: false).description)
|
||||
print("測試今天週幾:" + "MACRO@WEEK_SHORTENED".parseAsInputToken(isCHS: false).description)
|
||||
print("測試明天星期幾:" + "MACRO@WEEK_DAYDELTA:1".parseAsInputToken(isCHS: false).description)
|
||||
print("測試明天週幾:" + "MACRO@WEEK_SHORTENED_DAYDELTA:1".parseAsInputToken(isCHS: false).description)
|
||||
print("測試後天星期幾:" + "MACRO@WEEK_DAYDELTA:+2".parseAsInputToken(isCHS: false).description)
|
||||
print("測試後天週幾:" + "MACRO@WEEK_SHORTENED_DAYDELTA:+2".parseAsInputToken(isCHS: false).description)
|
||||
}
|
||||
|
||||
func testTranslatingTokens_5_Year() throws {
|
||||
print("測試今年:" + "MACRO@YEAR".parseAsInputToken(isCHS: false).description)
|
||||
print("測試今年干支:" + "MACRO@YEAR_GANZHI".parseAsInputToken(isCHS: false).description)
|
||||
print("測試今年生肖:" + "MACRO@YEAR_ZODIAC".parseAsInputToken(isCHS: false).description)
|
||||
print("測試一千年以前:" + "MACRO@YEAR_YEARDELTA:-1000".parseAsInputToken(isCHS: false).description)
|
||||
print("測試一千年以前干支:" + "MACRO@YEAR_GANZHI_YEARDELTA:-1000".parseAsInputToken(isCHS: false).description)
|
||||
print("測試一千年以前生肖:" + "MACRO@YEAR_ZODIAC_YEARDELTA:-1000".parseAsInputToken(isCHS: false).description)
|
||||
print("測試一千年以後:" + "MACRO@YEAR_YEARDELTA:1000".parseAsInputToken(isCHS: false).description)
|
||||
print("測試一千年以後干支:" + "MACRO@YEAR_GANZHI_YEARDELTA:1000".parseAsInputToken(isCHS: false).description)
|
||||
print("測試一千年以後生肖:" + "MACRO@YEAR_ZODIAC_YEARDELTA:1000".parseAsInputToken(isCHS: false).description)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue