From c5ce9199bd12d729cc6ad93fe039e7f630616845 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Thu, 14 Dec 2023 23:17:13 +0800 Subject: [PATCH] LMAssembly // Implementing InputToken support. --- .../LangModelAssembly/InputToken.swift | 247 ++++++++++++++++++ .../LangModelAssembly/LMInstantiator.swift | 16 +- .../LMInstantiator_DateTimeExtension.swift | 135 +++++----- .../InputTokenTests.swift | 58 ++++ 4 files changed, 379 insertions(+), 77 deletions(-) create mode 100644 Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/InputToken.swift create mode 100644 Packages/vChewing_LangModelAssembly/Tests/LangModelAssemblyTests/InputTokenTests.swift diff --git a/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/InputToken.swift b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/InputToken.swift new file mode 100644 index 00000000..3838395f --- /dev/null +++ b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/InputToken.swift @@ -0,0 +1,247 @@ +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) +// ... with NTL restriction stating that: +// No trademark license is granted to use the trade names, trademarks, service +// marks, or product names of Contributor, except as required to fulfill notice +// requirements defined in MIT License. + +import Foundation + +/// 工作原理:先用 InputToken.parse 分析原始字串,給出準確的 Token。 +/// 然後再讓這個 Token 用 .translated() 自我表述出轉換結果。 + +public enum InputToken { + case timeZone(shortened: Bool) + case timeNow(shortened: Bool) + case date(dayDelta: Int = 0, yearDelta: Int = 0, shortened: Bool = true, luna: Bool = false) + case week(dayDelta: Int = 0, shortened: Bool = true) + case year(yearDelta: Int = 0) + case yearGanzhi(yearDelta: Int = 0) + case yearZodiac(yearDelta: Int = 0) +} + +// MARK: - 正式對外投入使用的 API。 + +public extension String { + func parseAsInputToken(isCHS: Bool) -> [String] { + InputToken.parse(from: self).map { $0.translated(isCHS: isCHS) }.flatMap { $0 }.deduplicated + } +} + +// MARK: - Parser parsing raw token value to construct token. + +public extension InputToken { + static func parse(from rawToken: String) -> [InputToken] { + var result: [InputToken] = [] + guard rawToken.prefix(6) == "MACRO@" else { return result } + var mapParams: [String: Int] = [:] + let tokenComponents = rawToken.dropFirst(6).split(separator: "_").map { param in + let result = param.uppercased() + let kvPair = param.split(separator: ":") + guard kvPair.count == 2 else { return result } + guard let pairValue = Int(kvPair[1]) else { return result } + mapParams[kvPair[0].description] = pairValue + return result + } + guard !tokenComponents.isEmpty else { return result } + // 準備接收參數。 + let dayDelta: Int = mapParams["dayDelta".uppercased()] ?? 0 + let yearDelta: Int = mapParams["yearDelta".uppercased()] ?? 0 + let shortened: Bool = tokenComponents.contains("SHORTENED") + let hasZodiac: Bool = tokenComponents.contains("ZODIAC") + let hasGanzhi: Bool = tokenComponents.contains("GANZHI") + let hasLuna: Bool = tokenComponents.contains("LUNA") + + switch tokenComponents[0] { + case "TIMEZONE": result.append(.timeZone(shortened: shortened)) + case "TIME": result.append(.timeNow(shortened: shortened)) + case "DATE": result.append(.date(dayDelta: dayDelta, yearDelta: yearDelta, shortened: shortened, luna: hasLuna)) + case "WEEK": result.append(.week(dayDelta: dayDelta, shortened: shortened)) + case "YEAR": result.append(.year(yearDelta: yearDelta)) // 始終插入公曆年,方便對比參考。 + if hasZodiac { result.append(.yearZodiac(yearDelta: yearDelta)) } + if hasGanzhi { result.append(.yearGanzhi(yearDelta: yearDelta)) } + default: break + } + return result + } +} + +// MARK: - Parser parsing token itself. + +public extension InputToken { + func translated(isCHS: Bool) -> [String] { + let locale = Locale(identifier: isCHS ? "zh-Hans" : "zh-Hant-TW") + let formatter = DateFormatter() + formatter.locale = locale + let currentDate = Date() + var dateToDescribe = currentDate // 接下來會針對給定參數修正這個資料值。 + var results: [String] = [] + + /// 內部函式,用來修正 dateToDescribe 自身的參數值。 + func applyDelta(for type: Calendar.Component, delta deltaValue: Int) { + switch type { + case .year: + var delta = DateComponents() + let thisYear = Calendar.current.dateComponents([.year], from: currentDate).year ?? 2018 + delta.year = max(deltaValue, thisYear * -1) + dateToDescribe = Calendar.current.date(byAdding: delta, to: currentDate) ?? currentDate + case .day: + let dayLength = 60 * 60 * 24 + dateToDescribe = dateToDescribe.addingTimeInterval(Double(dayLength * deltaValue)) + default: break + } + } + + // 計算結果。 + switch self { + case let .timeZone(shortened): // 時區 + let resultToAdd = TimeZone.current.localizedName( + for: shortened ? .shortGeneric : .standard, locale: locale + ) ?? TimeZone.current.description + results.append(resultToAdd) + case let .timeNow(shortened): // 當前時間 + var formats = [String]() + switch (isCHS, shortened) { + case (false, true): formats.append(contentsOf: ["HH:mm", "HH點mm分", "HH時mm分"]) + case (false, false): formats.append(contentsOf: ["HH:mm:ss", "HH點mm分ss秒", "HH時mm分ss秒"]) + case (true, true): formats.append(contentsOf: ["HH:mm", "HH点mm分", "HH时mm分"]) + case (true, false): formats.append(contentsOf: ["HH:mm:ss", "HH点mm分ss秒", "HH时mm分ss秒"]) + } + formats.forEach { formatString in + formatter.dateFormat = formatString + results.append(formatter.string(from: dateToDescribe)) + } + let resultsExtra: [String] = results.compactMap { + guard !$0.contains(":") else { return nil } + var newResult = $0 + if newResult.first == "0" { newResult = newResult.dropFirst().description } + if newResult.prefix(2) == "2点" || newResult.prefix(2) == "2點" { + newResult = (isCHS ? "两点" : "兩點") + newResult.dropFirst(2).description + } + newResult = newResult.convertArabicNumeralsToChinese(onlyDigits: false) + newResult = newResult.replacingOccurrences(of: "〇", with: "零") + return newResult + } + results.append(contentsOf: resultsExtra) + case let .date(dayDelta, yearDelta, shortened, hasLuna): // 日期 + applyDelta(for: .year, delta: yearDelta) + applyDelta(for: .day, delta: dayDelta) + // 農曆單獨處理。 + guard !hasLuna else { + formatter.calendar = .init(identifier: .chinese) + formatter.dateStyle = .medium + formatter.dateFormat = "MMMd" + let dateString = formatter.string(from: dateToDescribe) + formatter.dateFormat = "U" + let yearGanzhi = formatter.string(from: dateToDescribe) + results.append("\(yearGanzhi)年\(dateString)") + if let yearZodiac = mapGanzhiToZodiac[yearGanzhi] { + results.append("\(isCHS ? yearZodiac.1 : yearZodiac.0)年\(dateString)") + } + break + } + let formats: [String] = [ + "MM-dd", "M月d日", "MM月dd日", + ] + var additionalResult: String? + for (i, formatString) in formats.enumerated() { + formatter.dateFormat = formatString + let dateStr = formatter.string(from: dateToDescribe) + switch (i == 0, shortened) { + case (false, true): formatter.dateFormat = "yy年" + case (true, false): formatter.dateFormat = "y-" + case (false, false): formatter.dateFormat = "y年" + case (true, true): formatter.dateFormat = "yy-" + } + let yearStr = formatter.string(from: dateToDescribe) + if i == 1 { + let anotherDateStr = dateStr.convertArabicNumeralsToChinese(onlyDigits: false) + let anotherYearStr = yearStr.convertArabicNumeralsToChinese(onlyDigits: true) + additionalResult = anotherYearStr + anotherDateStr + } + let newResult = yearStr + dateStr + guard !results.contains(newResult) else { continue } + results.append(newResult) + } + if let additionalResult = additionalResult { + results.append(additionalResult) + } + case let .week(dayDelta, shortened): // 星期 + applyDelta(for: .day, delta: dayDelta) + formatter.dateFormat = shortened ? "EE" : "EEEE" + results.append(formatter.string(from: dateToDescribe)) + case let .year(yearDelta): // 年度 + applyDelta(for: .year, delta: yearDelta) + formatter.dateFormat = "U年" + formatter.calendar = .init(identifier: .gregorian) + let result = formatter.string(from: dateToDescribe) + results.append(result) + results.append(result.convertArabicNumeralsToChinese(onlyDigits: true)) + case let .yearGanzhi(yearDelta): // 幹支(其實嚴格來講「干支」才是錯的) + applyDelta(for: .year, delta: yearDelta) + formatter.dateFormat = "U年" + formatter.calendar = .init(identifier: .chinese) + let result = formatter.string(from: dateToDescribe) + results.append(result) + case let .yearZodiac(yearDelta): // 十二生肖 + applyDelta(for: .year, delta: yearDelta) + formatter.dateFormat = "U" + formatter.calendar = .init(identifier: .chinese) + let rawKey = formatter.string(from: dateToDescribe) + guard let rawResultPair = mapGanzhiToZodiac[rawKey] else { break } + let rawResult = isCHS ? rawResultPair.1 : rawResultPair.0 + results.append(rawResult + "年") + } + + return results + } +} + +/// 註一:天干地支在簡體中文與繁體中文的寫法完全雷同。 +/// 註二:此處採吐蕃的陰陽五行生肖法、而非突厥五行納音生肖法。 +private let mapGanzhiToZodiac: [String: (String, String)] = [ + "甲子": ("木鼠", "木鼠"), "乙丑": ("木牛", "木牛"), "丙寅": ("火虎", "火虎"), "丁卯": ("火兔", "火兔"), + "戊辰": ("土龍", "土龙"), "己巳": ("土蛇", "土蛇"), "庚午": ("金馬", "金马"), "辛未": ("金羊", "金羊"), + "壬申": ("水猴", "水猴"), "癸酉": ("水雞", "水鸡"), "甲戌": ("木狗", "木狗"), "乙亥": ("木豬", "木猪"), + "丙子": ("火鼠", "火鼠"), "丁丑": ("火牛", "火牛"), "戊寅": ("土虎", "土虎"), "己卯": ("土兔", "土兔"), + "庚辰": ("金龍", "金龙"), "辛巳": ("金蛇", "金蛇"), "壬午": ("水馬", "水马"), "癸未": ("水羊", "水羊"), + "甲申": ("木猴", "木猴"), "乙酉": ("木雞", "木鸡"), "丙戌": ("火狗", "火狗"), "丁亥": ("火豬", "火猪"), + "戊子": ("土鼠", "土鼠"), "己丑": ("土牛", "土牛"), "庚寅": ("金虎", "金虎"), "辛卯": ("金兔", "金兔"), + "壬辰": ("水龍", "水龙"), "癸巳": ("水蛇", "水蛇"), "甲午": ("木馬", "木马"), "乙未": ("木羊", "木羊"), + "丙申": ("火猴", "火猴"), "丁酉": ("火雞", "火鸡"), "戊戌": ("土狗", "土狗"), "己亥": ("土豬", "土猪"), + "庚子": ("金鼠", "金鼠"), "辛丑": ("金牛", "金牛"), "壬寅": ("水虎", "水虎"), "癸卯": ("水兔", "水兔"), + "甲辰": ("木龍", "木龙"), "乙巳": ("木蛇", "木蛇"), "丙午": ("火馬", "火马"), "丁未": ("火羊", "火羊"), + "戊申": ("土猴", "土猴"), "己酉": ("土雞", "土鸡"), "庚戌": ("金狗", "金狗"), "辛亥": ("金豬", "金猪"), + "壬子": ("水鼠", "水鼠"), "癸丑": ("水牛", "水牛"), "甲寅": ("木虎", "木虎"), "乙卯": ("木兔", "木兔"), + "丙辰": ("火龍", "火龙"), "丁巳": ("火蛇", "火蛇"), "戊午": ("土馬", "土马"), "己未": ("土羊", "土羊"), + "庚申": ("金猴", "金猴"), "辛酉": ("金雞", "金鸡"), "壬戌": ("水狗", "水狗"), "癸亥": ("水豬", "水猪"), +] + +// MARK: - Date Time Language Conversion Extension + +private let tableMappingArabicDatesToChinese: [String: String] = { + let formatter = NumberFormatter() + formatter.locale = Locale(identifier: "zh-Hant-TW") // 預設是英文,設定為中文。繁簡一致。 + formatter.numberStyle = .spellOut + var result = [String: String]() + for i in 0 ... 60 { + result[i.description] = formatter.string(from: NSNumber(value: i)) + } + return result +}() + +private extension String { + /// 將給定的字串當中的阿拉伯數字轉為漢語小寫,逐字轉換。 + /// - Parameter target: 要進行轉換操作的對象,會直接修改該對象。 + func convertArabicNumeralsToChinese(onlyDigits: Bool) -> String { + var target = self + let sortedKeys = tableMappingArabicDatesToChinese.keys.sorted { $0.count > $1.count } + for key in sortedKeys { + if onlyDigits, key.count > 1 { continue } + guard let result = tableMappingArabicDatesToChinese[key] else { continue } + target = target.replacingOccurrences(of: key, with: result) + } + return target + } +} diff --git a/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator.swift b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator.swift index 6bc7be28..ae353cd9 100644 --- a/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator.swift +++ b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator.swift @@ -343,14 +343,26 @@ public extension vChewingLM { } } - // 新增與日期、時間、星期有關的單元圖資料 + // 分析且處理可能存在的 InputToken。 + rawAllUnigrams = rawAllUnigrams.map { unigram in + let convertedValues = unigram.value.parseAsInputToken(isCHS: isCHS) + guard !convertedValues.isEmpty else { return [unigram] } + var result = [Megrez.Unigram]() + convertedValues.enumerated().forEach { absDelta, value in + let newScore: Double = -80 - Double(absDelta) * 0.01 + result.append(.init(value: value, score: newScore)) + } + return result + }.flatMap { $0 } + + // 新增與日期、時間、星期有關的單元圖資料。 rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: keyChain)) if keyChain == "_punctuation_list" { rawAllUnigrams.append(contentsOf: getHaninSymbolMenuUnigrams()) } - // 提前處理語彙置換 + // 提前處理語彙置換。 if isPhraseReplacementEnabled { for i in 0 ..< rawAllUnigrams.count { let newValue = lmReplacements.valuesFor(key: rawAllUnigrams[i].value) diff --git a/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator_DateTimeExtension.swift b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator_DateTimeExtension.swift index 362604ea..71f6061c 100644 --- a/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator_DateTimeExtension.swift +++ b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator_DateTimeExtension.swift @@ -13,85 +13,70 @@ import Megrez extension vChewingLM.LMInstantiator { func queryDateTimeUnigrams(with key: String = "") -> [Megrez.Unigram] { - if !["ㄖˋ-ㄑㄧ", "ㄖˋ-ㄑㄧˊ", "ㄕˊ-ㄐㄧㄢ", "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ"].contains(key) { return .init() } + guard let tokenTrigger = TokenTrigger(rawValue: key) else { return [] } var results = [Megrez.Unigram]() - let theLocale = Locale(identifier: "zh-Hant") - let currentDate = Date() - var delta = DateComponents() - let thisYear = Calendar.current.dateComponents([.year], from: currentDate).year ?? 2018 - delta.year = max(min(deltaOfCalendarYears, 0), thisYear * -1) - let currentDateShortened = Calendar.current.date(byAdding: delta, to: currentDate) - switch key { - case "ㄖˋ-ㄑㄧ", "ㄖˋ-ㄑㄧˊ": - let formatterDate1 = DateFormatter() - let formatterDate2 = DateFormatter() - formatterDate1.dateFormat = "yyyy-MM-dd" - formatterDate2.dateFormat = "yyyy年MM月dd日" - let date1 = formatterDate1.string(from: currentDate) - let date2 = formatterDate2.string(from: currentDate) - var date3 = date2.convertArabicNumeralsToChinese - date3 = date3.replacingOccurrences(of: "年〇", with: "年") - date3 = date3.replacingOccurrences(of: "月〇", with: "月") - results.append(.init(value: date1, score: -94)) - results.append(.init(value: date2, score: -95)) - results.append(.init(value: date3, score: -96)) - if let currentDateShortened = currentDateShortened, delta.year != 0 { - var dateAlt1: String = formatterDate1.string(from: currentDateShortened) - dateAlt1.regReplace(pattern: #"^0+"#) - var dateAlt2: String = formatterDate2.string(from: currentDateShortened) - dateAlt2.regReplace(pattern: #"^0+"#) - var dateAlt3 = dateAlt2.convertArabicNumeralsToChinese - dateAlt3 = dateAlt3.replacingOccurrences(of: "年〇", with: "年") - dateAlt3 = dateAlt3.replacingOccurrences(of: "月〇", with: "月") - results.append(.init(value: dateAlt1, score: -97)) - results.append(.init(value: dateAlt2, score: -98)) - results.append(.init(value: dateAlt3, score: -99)) - } - case "ㄕˊ-ㄐㄧㄢ": - let formatterTime1 = DateFormatter() - let formatterTime2 = DateFormatter() - let formatterTime3 = DateFormatter() - formatterTime1.dateFormat = "HH:mm" - formatterTime2.dateFormat = isCHS ? "HH点mm分" : "HH點mm分" - formatterTime3.dateFormat = isCHS ? "HH时mm分" : "HH時mm分" - let time1 = formatterTime1.string(from: currentDate) - let time2 = formatterTime2.string(from: currentDate) - let time3 = formatterTime3.string(from: currentDate) - results.append(.init(value: time1, score: -97)) - results.append(.init(value: time2, score: -98)) - results.append(.init(value: time3, score: -99)) - case "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ": - let formatterWeek1 = DateFormatter() - let formatterWeek2 = DateFormatter() - formatterWeek1.dateFormat = "EEEE" - formatterWeek2.dateFormat = "EE" - formatterWeek1.locale = theLocale - formatterWeek2.locale = theLocale - let week1 = formatterWeek1.string(from: currentDate) - let week2 = formatterWeek2.string(from: currentDate) - results.append(.init(value: week1, score: -98)) - results.append(.init(value: week2, score: -99)) - default: return .init() + var tokens: [String] = [] + + func processDateWithDayDelta(_ delta: Int) { + tokens = ["MACRO@DATE_DAYDELTA:\(delta)"] + if deltaOfCalendarYears != 0 { tokens.append("MACRO@DATE_DAYDELTA:\(delta)_YEARDELTA:\(deltaOfCalendarYears)") } + tokens.append("MACRO@DATE_DAYDELTA:\(delta)_SHORTENED") + tokens.append("MACRO@DATE_DAYDELTA:\(delta)_LUNA") + } + + func processYearWithYearDelta(_ delta: Int) { + tokens = ["MACRO@YEAR_YEARDELTA:\(delta)"] + if deltaOfCalendarYears != 0 { tokens.append("MACRO@YEAR_YEARDELTA:\(delta + deltaOfCalendarYears)") } + tokens.append("MACRO@YEAR_GANZHI_YEARDELTA:\(delta)") + tokens.append("MACRO@YEAR_ZODIAC_YEARDELTA:\(delta)") + } + + switch tokenTrigger { + case .jin1tian1ri4qi2, .jin1tian1ri4qi1: processDateWithDayDelta(0) // 今天日期 + case .zuo2tian1ri4qi2, .zuo2tian1ri4qi1: processDateWithDayDelta(-1) // 昨天日期 + case .qian2tian1ri4qi2, .qian2tian1ri4qi1: processDateWithDayDelta(-2) // 前天日期 + case .ming2tian1ri4qi2, .ming2tian1ri4qi1: processDateWithDayDelta(1) // 明天日期 + case .hou4tian1ri4qi1, .hou4tian1ri4qi2: processDateWithDayDelta(2) // 後天日期 + case .jin1nian2nian2du4: processYearWithYearDelta(0) // 今年年度 + case .qu4nian2nian2du4: processYearWithYearDelta(-1) // 去年年度 + case .qian2nian2nian2du4: processYearWithYearDelta(-2) // 前年年度 + case .ming2nian2nian2du4: processYearWithYearDelta(1) // 明年年度 + case .hou4nian2nian2du4: processYearWithYearDelta(2) // 後年年度 + case .shi2jian1: tokens = ["MACRO@TIME_SHORTENED"] // 時間 + case .xing1qi1, .xing1qi2: tokens = ["MACRO@WEEK_SHORTENED", "MACRO@WEEK"] // 星期 + case .suo3zai4shi2qu1, .dang1qian2shi2qu1, .mu4qian2shi2qu1: tokens = ["MACRO@TIMEZONE", "MACRO@TIMEZONE_SHORTENED"] // 時區 + } + // 終末處理。 + let values = tokens.map { $0.parseAsInputToken(isCHS: isCHS) }.flatMap { $0 }.deduplicated + var i: Double = -99 + for strValue in values.reversed() { + results.insert(.init(value: strValue, score: i), at: 0) + i += 1 } return results } } -// MARK: - Date Time Language Conversion Extension - -private let tableMappingArabicNumeralsToChinese: [String: String] = [ - "0": "〇", "1": "一", "2": "二", "3": "三", "4": "四", "5": "五", "6": "六", "7": "七", "8": "八", "9": "九", -] - -private extension String { - /// 將給定的字串當中的阿拉伯數字轉為漢語小寫,逐字轉換。 - /// - Parameter target: 要進行轉換操作的對象,會直接修改該對象。 - var convertArabicNumeralsToChinese: String { - var target = self - for key in tableMappingArabicNumeralsToChinese.keys { - guard let result = tableMappingArabicNumeralsToChinese[key] else { continue } - target = target.replacingOccurrences(of: key, with: result) - } - return target - } +private enum TokenTrigger: String { + case shi2jian1 = "ㄕˊ-ㄐㄧㄢ" + case xing1qi1 = "ㄒㄧㄥ-ㄑㄧ" + case xing1qi2 = "ㄒㄧㄥ-ㄑㄧˊ" + case jin1nian2nian2du4 = "ㄐㄧㄣ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ" + case qu4nian2nian2du4 = "ㄑㄩˋ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ" + case ming2nian2nian2du4 = "ㄇㄧㄥˊ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ" + case qian2nian2nian2du4 = "ㄑㄧㄢˊ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ" + case hou4nian2nian2du4 = "ㄏㄡˋ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ" + case jin1tian1ri4qi2 = "ㄐㄧㄣ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ" + case ming2tian1ri4qi2 = "ㄇㄧㄥˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ" + case zuo2tian1ri4qi2 = "ㄗㄨㄛˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ" + case qian2tian1ri4qi2 = "ㄑㄧㄢˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ" + case hou4tian1ri4qi2 = "ㄏㄡˋ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ" + case jin1tian1ri4qi1 = "ㄐㄧㄣ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ" + case ming2tian1ri4qi1 = "ㄇㄧㄥˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ" + case zuo2tian1ri4qi1 = "ㄗㄨㄛˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ" + case qian2tian1ri4qi1 = "ㄑㄧㄢˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ" + case hou4tian1ri4qi1 = "ㄏㄡˋ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ" + case dang1qian2shi2qu1 = "ㄉㄤ-ㄑㄧㄢˊ-ㄕˊ-ㄑㄩ" + case mu4qian2shi2qu1 = "ㄇㄨˋ-ㄑㄧㄢˊ-ㄕˊ-ㄑㄩ" + case suo3zai4shi2qu1 = "ㄙㄨㄛˇ-ㄗㄞˋ-ㄕˊ-ㄑㄩ" } diff --git a/Packages/vChewing_LangModelAssembly/Tests/LangModelAssemblyTests/InputTokenTests.swift b/Packages/vChewing_LangModelAssembly/Tests/LangModelAssemblyTests/InputTokenTests.swift new file mode 100644 index 00000000..5afc867a --- /dev/null +++ b/Packages/vChewing_LangModelAssembly/Tests/LangModelAssemblyTests/InputTokenTests.swift @@ -0,0 +1,58 @@ +//// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) +// ... with NTL restriction stating that: +// No trademark license is granted to use the trade names, trademarks, service +// marks, or product names of Contributor, except as required to fulfill notice +// requirements defined in MIT License. + +import Foundation +import XCTest + +@testable import LangModelAssembly + +final class InputTokenTests: XCTestCase { + func testTranslatingTokens_1_TimeZone() throws { + print("測試時區俗稱:" + "MACRO@TIMEZONE_SHORTENED".parseAsInputToken(isCHS: false).description) + print("測試時區全稱:" + "MACRO@TIMEZONE".parseAsInputToken(isCHS: false).description) + } + + func testTranslatingTokens_2_TimeNow() throws { + print("測試時間時分:" + "MACRO@TIME_SHORTENED".parseAsInputToken(isCHS: false).description) + print("測試帶秒時間:" + "MACRO@TIME".parseAsInputToken(isCHS: true).description) + } + + func testTranslatingTokens_3_Date() throws { + print("測試農曆:" + "MACRO@DATE_LUNA".parseAsInputToken(isCHS: true).description) + print("測試二戰勝利紀年:" + "MACRO@DATE_YEARDELTA:-1945".parseAsInputToken(isCHS: true).description) + print("測試短日期之135天前:" + "MACRO@DATE_DAYDELTA:-135_SHORTENED".parseAsInputToken(isCHS: true).description) + print("測試長日期之135天前:" + "MACRO@DATE_DAYDELTA:-135".parseAsInputToken(isCHS: true).description) + print("測試短日期之今天:" + "MACRO@DATE_SHORTENED".parseAsInputToken(isCHS: true).description) + print("測試長日期之今天:" + "MACRO@DATE".parseAsInputToken(isCHS: true).description) + print("測試短日期之明天:" + "MACRO@DATE_SHORTENED_DAYDELTA:1".parseAsInputToken(isCHS: true).description) + print("測試長日期之明天:" + "MACRO@DATE_DAYDELTA:1".parseAsInputToken(isCHS: true).description) + print("測試短日期之明年:" + "MACRO@DATE_SHORTENED_YEARDELTA:1".parseAsInputToken(isCHS: true).description) + print("測試長日期之明年:" + "MACRO@DATE_YEARDELTA:1".parseAsInputToken(isCHS: true).description) + } + + func testTranslatingTokens_4_Week() throws { + print("測試今天星期幾:" + "MACRO@WEEK".parseAsInputToken(isCHS: false).description) + print("測試今天週幾:" + "MACRO@WEEK_SHORTENED".parseAsInputToken(isCHS: false).description) + print("測試明天星期幾:" + "MACRO@WEEK_DAYDELTA:1".parseAsInputToken(isCHS: false).description) + print("測試明天週幾:" + "MACRO@WEEK_SHORTENED_DAYDELTA:1".parseAsInputToken(isCHS: false).description) + print("測試後天星期幾:" + "MACRO@WEEK_DAYDELTA:+2".parseAsInputToken(isCHS: false).description) + print("測試後天週幾:" + "MACRO@WEEK_SHORTENED_DAYDELTA:+2".parseAsInputToken(isCHS: false).description) + } + + func testTranslatingTokens_5_Year() throws { + print("測試今年:" + "MACRO@YEAR".parseAsInputToken(isCHS: false).description) + print("測試今年干支:" + "MACRO@YEAR_GANZHI".parseAsInputToken(isCHS: false).description) + print("測試今年生肖:" + "MACRO@YEAR_ZODIAC".parseAsInputToken(isCHS: false).description) + print("測試一千年以前:" + "MACRO@YEAR_YEARDELTA:-1000".parseAsInputToken(isCHS: false).description) + print("測試一千年以前干支:" + "MACRO@YEAR_GANZHI_YEARDELTA:-1000".parseAsInputToken(isCHS: false).description) + print("測試一千年以前生肖:" + "MACRO@YEAR_ZODIAC_YEARDELTA:-1000".parseAsInputToken(isCHS: false).description) + print("測試一千年以後:" + "MACRO@YEAR_YEARDELTA:1000".parseAsInputToken(isCHS: false).description) + print("測試一千年以後干支:" + "MACRO@YEAR_GANZHI_YEARDELTA:1000".parseAsInputToken(isCHS: false).description) + print("測試一千年以後生肖:" + "MACRO@YEAR_ZODIAC_YEARDELTA:1000".parseAsInputToken(isCHS: false).description) + } +}