LMAssembly // Implementing InputToken support.
This commit is contained in:
parent
87f7328636
commit
c5ce9199bd
|
@ -0,0 +1,247 @@
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
|
// ====================
|
||||||
|
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
|
||||||
|
// ... with NTL restriction stating that:
|
||||||
|
// No trademark license is granted to use the trade names, trademarks, service
|
||||||
|
// marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
// requirements defined in MIT License.
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
/// 工作原理:先用 InputToken.parse 分析原始字串,給出準確的 Token。
|
||||||
|
/// 然後再讓這個 Token 用 .translated() 自我表述出轉換結果。
|
||||||
|
|
||||||
|
public enum InputToken {
|
||||||
|
case timeZone(shortened: Bool)
|
||||||
|
case timeNow(shortened: Bool)
|
||||||
|
case date(dayDelta: Int = 0, yearDelta: Int = 0, shortened: Bool = true, luna: Bool = false)
|
||||||
|
case week(dayDelta: Int = 0, shortened: Bool = true)
|
||||||
|
case year(yearDelta: Int = 0)
|
||||||
|
case yearGanzhi(yearDelta: Int = 0)
|
||||||
|
case yearZodiac(yearDelta: Int = 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - 正式對外投入使用的 API。
|
||||||
|
|
||||||
|
public extension String {
|
||||||
|
func parseAsInputToken(isCHS: Bool) -> [String] {
|
||||||
|
InputToken.parse(from: self).map { $0.translated(isCHS: isCHS) }.flatMap { $0 }.deduplicated
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Parser parsing raw token value to construct token.
|
||||||
|
|
||||||
|
public extension InputToken {
|
||||||
|
static func parse(from rawToken: String) -> [InputToken] {
|
||||||
|
var result: [InputToken] = []
|
||||||
|
guard rawToken.prefix(6) == "MACRO@" else { return result }
|
||||||
|
var mapParams: [String: Int] = [:]
|
||||||
|
let tokenComponents = rawToken.dropFirst(6).split(separator: "_").map { param in
|
||||||
|
let result = param.uppercased()
|
||||||
|
let kvPair = param.split(separator: ":")
|
||||||
|
guard kvPair.count == 2 else { return result }
|
||||||
|
guard let pairValue = Int(kvPair[1]) else { return result }
|
||||||
|
mapParams[kvPair[0].description] = pairValue
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
guard !tokenComponents.isEmpty else { return result }
|
||||||
|
// 準備接收參數。
|
||||||
|
let dayDelta: Int = mapParams["dayDelta".uppercased()] ?? 0
|
||||||
|
let yearDelta: Int = mapParams["yearDelta".uppercased()] ?? 0
|
||||||
|
let shortened: Bool = tokenComponents.contains("SHORTENED")
|
||||||
|
let hasZodiac: Bool = tokenComponents.contains("ZODIAC")
|
||||||
|
let hasGanzhi: Bool = tokenComponents.contains("GANZHI")
|
||||||
|
let hasLuna: Bool = tokenComponents.contains("LUNA")
|
||||||
|
|
||||||
|
switch tokenComponents[0] {
|
||||||
|
case "TIMEZONE": result.append(.timeZone(shortened: shortened))
|
||||||
|
case "TIME": result.append(.timeNow(shortened: shortened))
|
||||||
|
case "DATE": result.append(.date(dayDelta: dayDelta, yearDelta: yearDelta, shortened: shortened, luna: hasLuna))
|
||||||
|
case "WEEK": result.append(.week(dayDelta: dayDelta, shortened: shortened))
|
||||||
|
case "YEAR": result.append(.year(yearDelta: yearDelta)) // 始終插入公曆年,方便對比參考。
|
||||||
|
if hasZodiac { result.append(.yearZodiac(yearDelta: yearDelta)) }
|
||||||
|
if hasGanzhi { result.append(.yearGanzhi(yearDelta: yearDelta)) }
|
||||||
|
default: break
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Parser parsing token itself.
|
||||||
|
|
||||||
|
public extension InputToken {
|
||||||
|
func translated(isCHS: Bool) -> [String] {
|
||||||
|
let locale = Locale(identifier: isCHS ? "zh-Hans" : "zh-Hant-TW")
|
||||||
|
let formatter = DateFormatter()
|
||||||
|
formatter.locale = locale
|
||||||
|
let currentDate = Date()
|
||||||
|
var dateToDescribe = currentDate // 接下來會針對給定參數修正這個資料值。
|
||||||
|
var results: [String] = []
|
||||||
|
|
||||||
|
/// 內部函式,用來修正 dateToDescribe 自身的參數值。
|
||||||
|
func applyDelta(for type: Calendar.Component, delta deltaValue: Int) {
|
||||||
|
switch type {
|
||||||
|
case .year:
|
||||||
|
var delta = DateComponents()
|
||||||
|
let thisYear = Calendar.current.dateComponents([.year], from: currentDate).year ?? 2018
|
||||||
|
delta.year = max(deltaValue, thisYear * -1)
|
||||||
|
dateToDescribe = Calendar.current.date(byAdding: delta, to: currentDate) ?? currentDate
|
||||||
|
case .day:
|
||||||
|
let dayLength = 60 * 60 * 24
|
||||||
|
dateToDescribe = dateToDescribe.addingTimeInterval(Double(dayLength * deltaValue))
|
||||||
|
default: break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 計算結果。
|
||||||
|
switch self {
|
||||||
|
case let .timeZone(shortened): // 時區
|
||||||
|
let resultToAdd = TimeZone.current.localizedName(
|
||||||
|
for: shortened ? .shortGeneric : .standard, locale: locale
|
||||||
|
) ?? TimeZone.current.description
|
||||||
|
results.append(resultToAdd)
|
||||||
|
case let .timeNow(shortened): // 當前時間
|
||||||
|
var formats = [String]()
|
||||||
|
switch (isCHS, shortened) {
|
||||||
|
case (false, true): formats.append(contentsOf: ["HH:mm", "HH點mm分", "HH時mm分"])
|
||||||
|
case (false, false): formats.append(contentsOf: ["HH:mm:ss", "HH點mm分ss秒", "HH時mm分ss秒"])
|
||||||
|
case (true, true): formats.append(contentsOf: ["HH:mm", "HH点mm分", "HH时mm分"])
|
||||||
|
case (true, false): formats.append(contentsOf: ["HH:mm:ss", "HH点mm分ss秒", "HH时mm分ss秒"])
|
||||||
|
}
|
||||||
|
formats.forEach { formatString in
|
||||||
|
formatter.dateFormat = formatString
|
||||||
|
results.append(formatter.string(from: dateToDescribe))
|
||||||
|
}
|
||||||
|
let resultsExtra: [String] = results.compactMap {
|
||||||
|
guard !$0.contains(":") else { return nil }
|
||||||
|
var newResult = $0
|
||||||
|
if newResult.first == "0" { newResult = newResult.dropFirst().description }
|
||||||
|
if newResult.prefix(2) == "2点" || newResult.prefix(2) == "2點" {
|
||||||
|
newResult = (isCHS ? "两点" : "兩點") + newResult.dropFirst(2).description
|
||||||
|
}
|
||||||
|
newResult = newResult.convertArabicNumeralsToChinese(onlyDigits: false)
|
||||||
|
newResult = newResult.replacingOccurrences(of: "〇", with: "零")
|
||||||
|
return newResult
|
||||||
|
}
|
||||||
|
results.append(contentsOf: resultsExtra)
|
||||||
|
case let .date(dayDelta, yearDelta, shortened, hasLuna): // 日期
|
||||||
|
applyDelta(for: .year, delta: yearDelta)
|
||||||
|
applyDelta(for: .day, delta: dayDelta)
|
||||||
|
// 農曆單獨處理。
|
||||||
|
guard !hasLuna else {
|
||||||
|
formatter.calendar = .init(identifier: .chinese)
|
||||||
|
formatter.dateStyle = .medium
|
||||||
|
formatter.dateFormat = "MMMd"
|
||||||
|
let dateString = formatter.string(from: dateToDescribe)
|
||||||
|
formatter.dateFormat = "U"
|
||||||
|
let yearGanzhi = formatter.string(from: dateToDescribe)
|
||||||
|
results.append("\(yearGanzhi)年\(dateString)")
|
||||||
|
if let yearZodiac = mapGanzhiToZodiac[yearGanzhi] {
|
||||||
|
results.append("\(isCHS ? yearZodiac.1 : yearZodiac.0)年\(dateString)")
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
let formats: [String] = [
|
||||||
|
"MM-dd", "M月d日", "MM月dd日",
|
||||||
|
]
|
||||||
|
var additionalResult: String?
|
||||||
|
for (i, formatString) in formats.enumerated() {
|
||||||
|
formatter.dateFormat = formatString
|
||||||
|
let dateStr = formatter.string(from: dateToDescribe)
|
||||||
|
switch (i == 0, shortened) {
|
||||||
|
case (false, true): formatter.dateFormat = "yy年"
|
||||||
|
case (true, false): formatter.dateFormat = "y-"
|
||||||
|
case (false, false): formatter.dateFormat = "y年"
|
||||||
|
case (true, true): formatter.dateFormat = "yy-"
|
||||||
|
}
|
||||||
|
let yearStr = formatter.string(from: dateToDescribe)
|
||||||
|
if i == 1 {
|
||||||
|
let anotherDateStr = dateStr.convertArabicNumeralsToChinese(onlyDigits: false)
|
||||||
|
let anotherYearStr = yearStr.convertArabicNumeralsToChinese(onlyDigits: true)
|
||||||
|
additionalResult = anotherYearStr + anotherDateStr
|
||||||
|
}
|
||||||
|
let newResult = yearStr + dateStr
|
||||||
|
guard !results.contains(newResult) else { continue }
|
||||||
|
results.append(newResult)
|
||||||
|
}
|
||||||
|
if let additionalResult = additionalResult {
|
||||||
|
results.append(additionalResult)
|
||||||
|
}
|
||||||
|
case let .week(dayDelta, shortened): // 星期
|
||||||
|
applyDelta(for: .day, delta: dayDelta)
|
||||||
|
formatter.dateFormat = shortened ? "EE" : "EEEE"
|
||||||
|
results.append(formatter.string(from: dateToDescribe))
|
||||||
|
case let .year(yearDelta): // 年度
|
||||||
|
applyDelta(for: .year, delta: yearDelta)
|
||||||
|
formatter.dateFormat = "U年"
|
||||||
|
formatter.calendar = .init(identifier: .gregorian)
|
||||||
|
let result = formatter.string(from: dateToDescribe)
|
||||||
|
results.append(result)
|
||||||
|
results.append(result.convertArabicNumeralsToChinese(onlyDigits: true))
|
||||||
|
case let .yearGanzhi(yearDelta): // 幹支(其實嚴格來講「干支」才是錯的)
|
||||||
|
applyDelta(for: .year, delta: yearDelta)
|
||||||
|
formatter.dateFormat = "U年"
|
||||||
|
formatter.calendar = .init(identifier: .chinese)
|
||||||
|
let result = formatter.string(from: dateToDescribe)
|
||||||
|
results.append(result)
|
||||||
|
case let .yearZodiac(yearDelta): // 十二生肖
|
||||||
|
applyDelta(for: .year, delta: yearDelta)
|
||||||
|
formatter.dateFormat = "U"
|
||||||
|
formatter.calendar = .init(identifier: .chinese)
|
||||||
|
let rawKey = formatter.string(from: dateToDescribe)
|
||||||
|
guard let rawResultPair = mapGanzhiToZodiac[rawKey] else { break }
|
||||||
|
let rawResult = isCHS ? rawResultPair.1 : rawResultPair.0
|
||||||
|
results.append(rawResult + "年")
|
||||||
|
}
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 註一:天干地支在簡體中文與繁體中文的寫法完全雷同。
|
||||||
|
/// 註二:此處採吐蕃的陰陽五行生肖法、而非突厥五行納音生肖法。
|
||||||
|
private let mapGanzhiToZodiac: [String: (String, String)] = [
|
||||||
|
"甲子": ("木鼠", "木鼠"), "乙丑": ("木牛", "木牛"), "丙寅": ("火虎", "火虎"), "丁卯": ("火兔", "火兔"),
|
||||||
|
"戊辰": ("土龍", "土龙"), "己巳": ("土蛇", "土蛇"), "庚午": ("金馬", "金马"), "辛未": ("金羊", "金羊"),
|
||||||
|
"壬申": ("水猴", "水猴"), "癸酉": ("水雞", "水鸡"), "甲戌": ("木狗", "木狗"), "乙亥": ("木豬", "木猪"),
|
||||||
|
"丙子": ("火鼠", "火鼠"), "丁丑": ("火牛", "火牛"), "戊寅": ("土虎", "土虎"), "己卯": ("土兔", "土兔"),
|
||||||
|
"庚辰": ("金龍", "金龙"), "辛巳": ("金蛇", "金蛇"), "壬午": ("水馬", "水马"), "癸未": ("水羊", "水羊"),
|
||||||
|
"甲申": ("木猴", "木猴"), "乙酉": ("木雞", "木鸡"), "丙戌": ("火狗", "火狗"), "丁亥": ("火豬", "火猪"),
|
||||||
|
"戊子": ("土鼠", "土鼠"), "己丑": ("土牛", "土牛"), "庚寅": ("金虎", "金虎"), "辛卯": ("金兔", "金兔"),
|
||||||
|
"壬辰": ("水龍", "水龙"), "癸巳": ("水蛇", "水蛇"), "甲午": ("木馬", "木马"), "乙未": ("木羊", "木羊"),
|
||||||
|
"丙申": ("火猴", "火猴"), "丁酉": ("火雞", "火鸡"), "戊戌": ("土狗", "土狗"), "己亥": ("土豬", "土猪"),
|
||||||
|
"庚子": ("金鼠", "金鼠"), "辛丑": ("金牛", "金牛"), "壬寅": ("水虎", "水虎"), "癸卯": ("水兔", "水兔"),
|
||||||
|
"甲辰": ("木龍", "木龙"), "乙巳": ("木蛇", "木蛇"), "丙午": ("火馬", "火马"), "丁未": ("火羊", "火羊"),
|
||||||
|
"戊申": ("土猴", "土猴"), "己酉": ("土雞", "土鸡"), "庚戌": ("金狗", "金狗"), "辛亥": ("金豬", "金猪"),
|
||||||
|
"壬子": ("水鼠", "水鼠"), "癸丑": ("水牛", "水牛"), "甲寅": ("木虎", "木虎"), "乙卯": ("木兔", "木兔"),
|
||||||
|
"丙辰": ("火龍", "火龙"), "丁巳": ("火蛇", "火蛇"), "戊午": ("土馬", "土马"), "己未": ("土羊", "土羊"),
|
||||||
|
"庚申": ("金猴", "金猴"), "辛酉": ("金雞", "金鸡"), "壬戌": ("水狗", "水狗"), "癸亥": ("水豬", "水猪"),
|
||||||
|
]
|
||||||
|
|
||||||
|
// MARK: - Date Time Language Conversion Extension
|
||||||
|
|
||||||
|
private let tableMappingArabicDatesToChinese: [String: String] = {
|
||||||
|
let formatter = NumberFormatter()
|
||||||
|
formatter.locale = Locale(identifier: "zh-Hant-TW") // 預設是英文,設定為中文。繁簡一致。
|
||||||
|
formatter.numberStyle = .spellOut
|
||||||
|
var result = [String: String]()
|
||||||
|
for i in 0 ... 60 {
|
||||||
|
result[i.description] = formatter.string(from: NSNumber(value: i))
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}()
|
||||||
|
|
||||||
|
private extension String {
|
||||||
|
/// 將給定的字串當中的阿拉伯數字轉為漢語小寫,逐字轉換。
|
||||||
|
/// - Parameter target: 要進行轉換操作的對象,會直接修改該對象。
|
||||||
|
func convertArabicNumeralsToChinese(onlyDigits: Bool) -> String {
|
||||||
|
var target = self
|
||||||
|
let sortedKeys = tableMappingArabicDatesToChinese.keys.sorted { $0.count > $1.count }
|
||||||
|
for key in sortedKeys {
|
||||||
|
if onlyDigits, key.count > 1 { continue }
|
||||||
|
guard let result = tableMappingArabicDatesToChinese[key] else { continue }
|
||||||
|
target = target.replacingOccurrences(of: key, with: result)
|
||||||
|
}
|
||||||
|
return target
|
||||||
|
}
|
||||||
|
}
|
|
@ -343,14 +343,26 @@ public extension vChewingLM {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 新增與日期、時間、星期有關的單元圖資料
|
// 分析且處理可能存在的 InputToken。
|
||||||
|
rawAllUnigrams = rawAllUnigrams.map { unigram in
|
||||||
|
let convertedValues = unigram.value.parseAsInputToken(isCHS: isCHS)
|
||||||
|
guard !convertedValues.isEmpty else { return [unigram] }
|
||||||
|
var result = [Megrez.Unigram]()
|
||||||
|
convertedValues.enumerated().forEach { absDelta, value in
|
||||||
|
let newScore: Double = -80 - Double(absDelta) * 0.01
|
||||||
|
result.append(.init(value: value, score: newScore))
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}.flatMap { $0 }
|
||||||
|
|
||||||
|
// 新增與日期、時間、星期有關的單元圖資料。
|
||||||
rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: keyChain))
|
rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: keyChain))
|
||||||
|
|
||||||
if keyChain == "_punctuation_list" {
|
if keyChain == "_punctuation_list" {
|
||||||
rawAllUnigrams.append(contentsOf: getHaninSymbolMenuUnigrams())
|
rawAllUnigrams.append(contentsOf: getHaninSymbolMenuUnigrams())
|
||||||
}
|
}
|
||||||
|
|
||||||
// 提前處理語彙置換
|
// 提前處理語彙置換。
|
||||||
if isPhraseReplacementEnabled {
|
if isPhraseReplacementEnabled {
|
||||||
for i in 0 ..< rawAllUnigrams.count {
|
for i in 0 ..< rawAllUnigrams.count {
|
||||||
let newValue = lmReplacements.valuesFor(key: rawAllUnigrams[i].value)
|
let newValue = lmReplacements.valuesFor(key: rawAllUnigrams[i].value)
|
||||||
|
|
|
@ -13,85 +13,70 @@ import Megrez
|
||||||
|
|
||||||
extension vChewingLM.LMInstantiator {
|
extension vChewingLM.LMInstantiator {
|
||||||
func queryDateTimeUnigrams(with key: String = "") -> [Megrez.Unigram] {
|
func queryDateTimeUnigrams(with key: String = "") -> [Megrez.Unigram] {
|
||||||
if !["ㄖˋ-ㄑㄧ", "ㄖˋ-ㄑㄧˊ", "ㄕˊ-ㄐㄧㄢ", "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ"].contains(key) { return .init() }
|
guard let tokenTrigger = TokenTrigger(rawValue: key) else { return [] }
|
||||||
var results = [Megrez.Unigram]()
|
var results = [Megrez.Unigram]()
|
||||||
let theLocale = Locale(identifier: "zh-Hant")
|
var tokens: [String] = []
|
||||||
let currentDate = Date()
|
|
||||||
var delta = DateComponents()
|
func processDateWithDayDelta(_ delta: Int) {
|
||||||
let thisYear = Calendar.current.dateComponents([.year], from: currentDate).year ?? 2018
|
tokens = ["MACRO@DATE_DAYDELTA:\(delta)"]
|
||||||
delta.year = max(min(deltaOfCalendarYears, 0), thisYear * -1)
|
if deltaOfCalendarYears != 0 { tokens.append("MACRO@DATE_DAYDELTA:\(delta)_YEARDELTA:\(deltaOfCalendarYears)") }
|
||||||
let currentDateShortened = Calendar.current.date(byAdding: delta, to: currentDate)
|
tokens.append("MACRO@DATE_DAYDELTA:\(delta)_SHORTENED")
|
||||||
switch key {
|
tokens.append("MACRO@DATE_DAYDELTA:\(delta)_LUNA")
|
||||||
case "ㄖˋ-ㄑㄧ", "ㄖˋ-ㄑㄧˊ":
|
|
||||||
let formatterDate1 = DateFormatter()
|
|
||||||
let formatterDate2 = DateFormatter()
|
|
||||||
formatterDate1.dateFormat = "yyyy-MM-dd"
|
|
||||||
formatterDate2.dateFormat = "yyyy年MM月dd日"
|
|
||||||
let date1 = formatterDate1.string(from: currentDate)
|
|
||||||
let date2 = formatterDate2.string(from: currentDate)
|
|
||||||
var date3 = date2.convertArabicNumeralsToChinese
|
|
||||||
date3 = date3.replacingOccurrences(of: "年〇", with: "年")
|
|
||||||
date3 = date3.replacingOccurrences(of: "月〇", with: "月")
|
|
||||||
results.append(.init(value: date1, score: -94))
|
|
||||||
results.append(.init(value: date2, score: -95))
|
|
||||||
results.append(.init(value: date3, score: -96))
|
|
||||||
if let currentDateShortened = currentDateShortened, delta.year != 0 {
|
|
||||||
var dateAlt1: String = formatterDate1.string(from: currentDateShortened)
|
|
||||||
dateAlt1.regReplace(pattern: #"^0+"#)
|
|
||||||
var dateAlt2: String = formatterDate2.string(from: currentDateShortened)
|
|
||||||
dateAlt2.regReplace(pattern: #"^0+"#)
|
|
||||||
var dateAlt3 = dateAlt2.convertArabicNumeralsToChinese
|
|
||||||
dateAlt3 = dateAlt3.replacingOccurrences(of: "年〇", with: "年")
|
|
||||||
dateAlt3 = dateAlt3.replacingOccurrences(of: "月〇", with: "月")
|
|
||||||
results.append(.init(value: dateAlt1, score: -97))
|
|
||||||
results.append(.init(value: dateAlt2, score: -98))
|
|
||||||
results.append(.init(value: dateAlt3, score: -99))
|
|
||||||
}
|
}
|
||||||
case "ㄕˊ-ㄐㄧㄢ":
|
|
||||||
let formatterTime1 = DateFormatter()
|
func processYearWithYearDelta(_ delta: Int) {
|
||||||
let formatterTime2 = DateFormatter()
|
tokens = ["MACRO@YEAR_YEARDELTA:\(delta)"]
|
||||||
let formatterTime3 = DateFormatter()
|
if deltaOfCalendarYears != 0 { tokens.append("MACRO@YEAR_YEARDELTA:\(delta + deltaOfCalendarYears)") }
|
||||||
formatterTime1.dateFormat = "HH:mm"
|
tokens.append("MACRO@YEAR_GANZHI_YEARDELTA:\(delta)")
|
||||||
formatterTime2.dateFormat = isCHS ? "HH点mm分" : "HH點mm分"
|
tokens.append("MACRO@YEAR_ZODIAC_YEARDELTA:\(delta)")
|
||||||
formatterTime3.dateFormat = isCHS ? "HH时mm分" : "HH時mm分"
|
}
|
||||||
let time1 = formatterTime1.string(from: currentDate)
|
|
||||||
let time2 = formatterTime2.string(from: currentDate)
|
switch tokenTrigger {
|
||||||
let time3 = formatterTime3.string(from: currentDate)
|
case .jin1tian1ri4qi2, .jin1tian1ri4qi1: processDateWithDayDelta(0) // 今天日期
|
||||||
results.append(.init(value: time1, score: -97))
|
case .zuo2tian1ri4qi2, .zuo2tian1ri4qi1: processDateWithDayDelta(-1) // 昨天日期
|
||||||
results.append(.init(value: time2, score: -98))
|
case .qian2tian1ri4qi2, .qian2tian1ri4qi1: processDateWithDayDelta(-2) // 前天日期
|
||||||
results.append(.init(value: time3, score: -99))
|
case .ming2tian1ri4qi2, .ming2tian1ri4qi1: processDateWithDayDelta(1) // 明天日期
|
||||||
case "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ":
|
case .hou4tian1ri4qi1, .hou4tian1ri4qi2: processDateWithDayDelta(2) // 後天日期
|
||||||
let formatterWeek1 = DateFormatter()
|
case .jin1nian2nian2du4: processYearWithYearDelta(0) // 今年年度
|
||||||
let formatterWeek2 = DateFormatter()
|
case .qu4nian2nian2du4: processYearWithYearDelta(-1) // 去年年度
|
||||||
formatterWeek1.dateFormat = "EEEE"
|
case .qian2nian2nian2du4: processYearWithYearDelta(-2) // 前年年度
|
||||||
formatterWeek2.dateFormat = "EE"
|
case .ming2nian2nian2du4: processYearWithYearDelta(1) // 明年年度
|
||||||
formatterWeek1.locale = theLocale
|
case .hou4nian2nian2du4: processYearWithYearDelta(2) // 後年年度
|
||||||
formatterWeek2.locale = theLocale
|
case .shi2jian1: tokens = ["MACRO@TIME_SHORTENED"] // 時間
|
||||||
let week1 = formatterWeek1.string(from: currentDate)
|
case .xing1qi1, .xing1qi2: tokens = ["MACRO@WEEK_SHORTENED", "MACRO@WEEK"] // 星期
|
||||||
let week2 = formatterWeek2.string(from: currentDate)
|
case .suo3zai4shi2qu1, .dang1qian2shi2qu1, .mu4qian2shi2qu1: tokens = ["MACRO@TIMEZONE", "MACRO@TIMEZONE_SHORTENED"] // 時區
|
||||||
results.append(.init(value: week1, score: -98))
|
}
|
||||||
results.append(.init(value: week2, score: -99))
|
// 終末處理。
|
||||||
default: return .init()
|
let values = tokens.map { $0.parseAsInputToken(isCHS: isCHS) }.flatMap { $0 }.deduplicated
|
||||||
|
var i: Double = -99
|
||||||
|
for strValue in values.reversed() {
|
||||||
|
results.insert(.init(value: strValue, score: i), at: 0)
|
||||||
|
i += 1
|
||||||
}
|
}
|
||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Date Time Language Conversion Extension
|
private enum TokenTrigger: String {
|
||||||
|
case shi2jian1 = "ㄕˊ-ㄐㄧㄢ"
|
||||||
private let tableMappingArabicNumeralsToChinese: [String: String] = [
|
case xing1qi1 = "ㄒㄧㄥ-ㄑㄧ"
|
||||||
"0": "〇", "1": "一", "2": "二", "3": "三", "4": "四", "5": "五", "6": "六", "7": "七", "8": "八", "9": "九",
|
case xing1qi2 = "ㄒㄧㄥ-ㄑㄧˊ"
|
||||||
]
|
case jin1nian2nian2du4 = "ㄐㄧㄣ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||||
|
case qu4nian2nian2du4 = "ㄑㄩˋ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||||
private extension String {
|
case ming2nian2nian2du4 = "ㄇㄧㄥˊ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||||
/// 將給定的字串當中的阿拉伯數字轉為漢語小寫,逐字轉換。
|
case qian2nian2nian2du4 = "ㄑㄧㄢˊ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||||
/// - Parameter target: 要進行轉換操作的對象,會直接修改該對象。
|
case hou4nian2nian2du4 = "ㄏㄡˋ-ㄋㄧㄢˊ-ㄋㄧㄢˊ-ㄉㄨˋ"
|
||||||
var convertArabicNumeralsToChinese: String {
|
case jin1tian1ri4qi2 = "ㄐㄧㄣ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||||
var target = self
|
case ming2tian1ri4qi2 = "ㄇㄧㄥˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||||
for key in tableMappingArabicNumeralsToChinese.keys {
|
case zuo2tian1ri4qi2 = "ㄗㄨㄛˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||||
guard let result = tableMappingArabicNumeralsToChinese[key] else { continue }
|
case qian2tian1ri4qi2 = "ㄑㄧㄢˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||||
target = target.replacingOccurrences(of: key, with: result)
|
case hou4tian1ri4qi2 = "ㄏㄡˋ-ㄊㄧㄢ-ㄖˋ-ㄑㄧˊ"
|
||||||
}
|
case jin1tian1ri4qi1 = "ㄐㄧㄣ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||||
return target
|
case ming2tian1ri4qi1 = "ㄇㄧㄥˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||||
}
|
case zuo2tian1ri4qi1 = "ㄗㄨㄛˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||||
|
case qian2tian1ri4qi1 = "ㄑㄧㄢˊ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||||
|
case hou4tian1ri4qi1 = "ㄏㄡˋ-ㄊㄧㄢ-ㄖˋ-ㄑㄧ"
|
||||||
|
case dang1qian2shi2qu1 = "ㄉㄤ-ㄑㄧㄢˊ-ㄕˊ-ㄑㄩ"
|
||||||
|
case mu4qian2shi2qu1 = "ㄇㄨˋ-ㄑㄧㄢˊ-ㄕˊ-ㄑㄩ"
|
||||||
|
case suo3zai4shi2qu1 = "ㄙㄨㄛˇ-ㄗㄞˋ-ㄕˊ-ㄑㄩ"
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
//// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
|
// ====================
|
||||||
|
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
|
||||||
|
// ... with NTL restriction stating that:
|
||||||
|
// No trademark license is granted to use the trade names, trademarks, service
|
||||||
|
// marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
// requirements defined in MIT License.
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
import XCTest
|
||||||
|
|
||||||
|
@testable import LangModelAssembly
|
||||||
|
|
||||||
|
final class InputTokenTests: XCTestCase {
|
||||||
|
func testTranslatingTokens_1_TimeZone() throws {
|
||||||
|
print("測試時區俗稱:" + "MACRO@TIMEZONE_SHORTENED".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試時區全稱:" + "MACRO@TIMEZONE".parseAsInputToken(isCHS: false).description)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testTranslatingTokens_2_TimeNow() throws {
|
||||||
|
print("測試時間時分:" + "MACRO@TIME_SHORTENED".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試帶秒時間:" + "MACRO@TIME".parseAsInputToken(isCHS: true).description)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testTranslatingTokens_3_Date() throws {
|
||||||
|
print("測試農曆:" + "MACRO@DATE_LUNA".parseAsInputToken(isCHS: true).description)
|
||||||
|
print("測試二戰勝利紀年:" + "MACRO@DATE_YEARDELTA:-1945".parseAsInputToken(isCHS: true).description)
|
||||||
|
print("測試短日期之135天前:" + "MACRO@DATE_DAYDELTA:-135_SHORTENED".parseAsInputToken(isCHS: true).description)
|
||||||
|
print("測試長日期之135天前:" + "MACRO@DATE_DAYDELTA:-135".parseAsInputToken(isCHS: true).description)
|
||||||
|
print("測試短日期之今天:" + "MACRO@DATE_SHORTENED".parseAsInputToken(isCHS: true).description)
|
||||||
|
print("測試長日期之今天:" + "MACRO@DATE".parseAsInputToken(isCHS: true).description)
|
||||||
|
print("測試短日期之明天:" + "MACRO@DATE_SHORTENED_DAYDELTA:1".parseAsInputToken(isCHS: true).description)
|
||||||
|
print("測試長日期之明天:" + "MACRO@DATE_DAYDELTA:1".parseAsInputToken(isCHS: true).description)
|
||||||
|
print("測試短日期之明年:" + "MACRO@DATE_SHORTENED_YEARDELTA:1".parseAsInputToken(isCHS: true).description)
|
||||||
|
print("測試長日期之明年:" + "MACRO@DATE_YEARDELTA:1".parseAsInputToken(isCHS: true).description)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testTranslatingTokens_4_Week() throws {
|
||||||
|
print("測試今天星期幾:" + "MACRO@WEEK".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試今天週幾:" + "MACRO@WEEK_SHORTENED".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試明天星期幾:" + "MACRO@WEEK_DAYDELTA:1".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試明天週幾:" + "MACRO@WEEK_SHORTENED_DAYDELTA:1".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試後天星期幾:" + "MACRO@WEEK_DAYDELTA:+2".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試後天週幾:" + "MACRO@WEEK_SHORTENED_DAYDELTA:+2".parseAsInputToken(isCHS: false).description)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testTranslatingTokens_5_Year() throws {
|
||||||
|
print("測試今年:" + "MACRO@YEAR".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試今年干支:" + "MACRO@YEAR_GANZHI".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試今年生肖:" + "MACRO@YEAR_ZODIAC".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試一千年以前:" + "MACRO@YEAR_YEARDELTA:-1000".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試一千年以前干支:" + "MACRO@YEAR_GANZHI_YEARDELTA:-1000".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試一千年以前生肖:" + "MACRO@YEAR_ZODIAC_YEARDELTA:-1000".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試一千年以後:" + "MACRO@YEAR_YEARDELTA:1000".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試一千年以後干支:" + "MACRO@YEAR_GANZHI_YEARDELTA:1000".parseAsInputToken(isCHS: false).description)
|
||||||
|
print("測試一千年以後生肖:" + "MACRO@YEAR_ZODIAC_YEARDELTA:1000".parseAsInputToken(isCHS: false).description)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue