From 2116a24dc9ef0217e63631b0cee1063224ce9533 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Wed, 10 Aug 2022 08:57:12 +0800 Subject: [PATCH 1/6] Repo // Tekkon v1.2.9 update. --- .../Modules/ControllerModules/SyllableComposer.swift | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Source/Modules/ControllerModules/SyllableComposer.swift b/Source/Modules/ControllerModules/SyllableComposer.swift index a9b5d26e..e6f00eca 100644 --- a/Source/Modules/ControllerModules/SyllableComposer.swift +++ b/Source/Modules/ControllerModules/SyllableComposer.swift @@ -922,7 +922,8 @@ public struct Tekkon { /// - newToneOne: 對陰平指定新的標記。預設情況下該標記為空字串。 /// - Returns: 轉換結果。 static func cnvHanyuPinyinToPhona(target: String, newToneOne: String = "") -> String { - if target.contains("_") { return target } + /// 如果當前內容有任何除了半形英數內容以外的內容的話,就直接放棄轉換。 + if target.contains("_") || !target.isNotPureAlphanumeral { return target } var result = target for key in Tekkon.mapHanyuPinyin.keys.sorted(by: { $0.count > $1.count }) { guard let value = Tekkon.mapHanyuPinyin[key] else { continue } @@ -1446,3 +1447,12 @@ public struct Tekkon { "s": "ㄙ", "t": "ㄊ", "u": "ㄡ", "v": "ㄩ", "w": "ㄨ", "x": "ㄒ", "y": "ㄧ", "z": "ㄗ", " ": " ", ] } + +/// 檢測字串是否包含半形英數內容 +extension String { + fileprivate var isNotPureAlphanumeral: Bool { + let regex = ".*[^A-Za-z0-9].*" + let testString = NSPredicate(format: "SELF MATCHES %@", regex) + return testString.evaluate(with: self) + } +} From fc093f51e9d0ce4f5a99e98ec489f45e38353f33 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Wed, 10 Aug 2022 00:13:33 +0800 Subject: [PATCH 2/6] mgrLM // Add userSCPCSequencesData-related contents. --- .../LangModelRelated/mgrLangModel.swift | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Source/Modules/LangModelRelated/mgrLangModel.swift b/Source/Modules/LangModelRelated/mgrLangModel.swift index 3fc48028..d6f721f8 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.swift +++ b/Source/Modules/LangModelRelated/mgrLangModel.swift @@ -175,6 +175,15 @@ enum mgrLangModel { ) } + public static func loadUserSCPCSequencesData() { + gLangModelCHT.loadUserSCPCSequencesData( + path: mgrLangModel.userSCPCSequencesURL(InputMode.imeModeCHT).path + ) + gLangModelCHS.loadUserSCPCSequencesData( + path: mgrLangModel.userSCPCSequencesURL(InputMode.imeModeCHS).path + ) + } + public static func checkIfUserPhraseExist( userPhrase: String, mode: InputMode, @@ -256,6 +265,14 @@ enum mgrLangModel { return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) } + /// 使用者逐字選字模式候選字詞順序資料路徑。 + /// - Parameter mode: 簡繁體輸入模式。 + /// - Returns: 資料路徑(URL)。 + static func userSCPCSequencesURL(_ mode: InputMode) -> URL { + let fileName = (mode == InputMode.imeModeCHT) ? "data-plain-bpmf-cht.plist" : "data-plain-bpmf-chs.plist" + return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) + } + /// 使用者波浪符號選單資料路徑。 /// - Returns: 資料路徑(URL)。 static func userSymbolNodeDataURL() -> URL { @@ -311,6 +328,7 @@ enum mgrLangModel { userAssociatesDataURL(mode), populateWithTemplate: mode == .imeModeCHS ? kTemplateNameUserAssociatesCHS : kTemplateNameUserAssociatesCHT ) + || !ensureFileExists(userSCPCSequencesURL(mode)) || !ensureFileExists(userFilteredDataURL(mode), populateWithTemplate: kTemplateNameUserExclusions) || !ensureFileExists(userReplacementsDataURL(mode), populateWithTemplate: kTemplateNameUserReplacements) || !ensureFileExists(userSymbolDataURL(mode), populateWithTemplate: kTemplateNameUserSymbolPhrases) From 12952782522088c32ca99829f786710491890cd5 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Wed, 10 Aug 2022 00:14:40 +0800 Subject: [PATCH 3/6] IME // +mgrLangModel.loadUserSCPCSequencesData(). --- Source/Modules/IMEModules/IME.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Modules/IMEModules/IME.swift b/Source/Modules/IMEModules/IME.swift index 3cc7405e..4108c016 100644 --- a/Source/Modules/IMEModules/IME.swift +++ b/Source/Modules/IMEModules/IME.swift @@ -79,6 +79,7 @@ public enum IME { // 所以這裡不需要特別處理。 mgrLangModel.loadUserAssociatesData() mgrLangModel.loadUserPhraseReplacement() + mgrLangModel.loadUserSCPCSequencesData() mgrLangModel.loadUserPhrasesData() if !userOnly { // mgrLangModel.loadDataModels() From 5468aad6630624bd71df748d041269616355eeda Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Wed, 10 Aug 2022 00:06:27 +0800 Subject: [PATCH 4/6] Repo // Deploy LMPlainBopomofo. --- .../LangModelRelated/LMInstantiator.swift | 16 ++++ .../SubLMs/lmPlainBopomofo.swift | 83 +++++++++++++++++++ vChewing.xcodeproj/project.pbxproj | 4 + 3 files changed, 103 insertions(+) create mode 100644 Source/Modules/LangModelRelated/SubLMs/lmPlainBopomofo.swift diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index e0c3c681..ad8f742e 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -76,6 +76,7 @@ extension vChewing { ) var lmReplacements = LMReplacments() var lmAssociates = LMAssociates() + var lmPlainBopomofo = LMPlainBopomofo() // MARK: - 工具函式 @@ -166,6 +167,16 @@ extension vChewing { } } + public func loadUserSCPCSequencesData(path: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmPlainBopomofo.close() + lmPlainBopomofo.open(path) + IME.prtDebugIntel("lmPlainBopomofo: \(lmPlainBopomofo.count) entries of data loaded from: \(path)") + } else { + IME.prtDebugIntel("lmPlainBopomofo: File access failure: \(path)") + } + } + // MARK: - 核心函式(對外) /// 威注音輸入法目前尚未具備對雙元圖的處理能力,故停用該函式。 @@ -181,6 +192,11 @@ extension vChewing { /// 準備不同的語言模組容器,開始逐漸往容器陣列內塞入資料。 var rawAllUnigrams: [Megrez.Unigram] = [] + // 如果有檢測到使用者自訂逐字選字語料庫內的相關資料的話,在這裡先插入。 + if mgrPrefs.useSCPCTypingMode { + rawAllUnigrams += lmPlainBopomofo.valuesFor(key: key).map { Megrez.Unigram.init(value: $0, score: 0) } + } + // 用 reversed 指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。 // 這樣一來就可以在就地新增語彙時徹底複寫優先權。 // 將兩句差分也是為了讓 rawUserUnigrams 的類型不受可能的影響。 diff --git a/Source/Modules/LangModelRelated/SubLMs/lmPlainBopomofo.swift b/Source/Modules/LangModelRelated/SubLMs/lmPlainBopomofo.swift new file mode 100644 index 00000000..356bcce3 --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLMs/lmPlainBopomofo.swift @@ -0,0 +1,83 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// StringView Ranges extension by (c) 2022 and onwards Isaac Xen (MIT License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) +// ... with NTL restriction stating that: +// No trademark license is granted to use the trade names, trademarks, service +// marks, or product names of Contributor, except as required to fulfill notice +// requirements defined in MIT License. + +import Foundation + +extension vChewing { + @frozen public struct LMPlainBopomofo { + var rangeMap: [String: String] = [:] + + public var count: Int { + rangeMap.count + } + + public init() { + rangeMap = [:] + } + + public func isLoaded() -> Bool { + !rangeMap.isEmpty + } + + @discardableResult public mutating func open(_ path: String) -> Bool { + if isLoaded() { + return false + } + + do { + let rawData = try Data(contentsOf: URL(fileURLWithPath: path)) + let rawPlist: [String: String] = + try PropertyListSerialization.propertyList(from: rawData, format: nil) as? [String: String] ?? .init() + rangeMap = rawPlist + } catch { + IME.prtDebugIntel("\(error)") + IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") + return false + } + + return true + } + + public mutating func close() { + if isLoaded() { + rangeMap.removeAll() + } + } + + public func dump() { + // We remove this function in order to reduce out maintenance workload. + // This function will be implemented only if further hard-necessity comes. + } + + public func valuesFor(key: String) -> [String] { + var pairs: [String] = [] + if let arrRangeRecords: String = rangeMap[key] { + pairs.append(contentsOf: arrRangeRecords.map({ String($0) })) + } + var set = Set() + return pairs.filter { set.insert($0).inserted } + } + + public func hasValuesFor(key: String) -> Bool { rangeMap.keys.contains(key) } + } +} + +// MARK: - StringView Ranges Extension (by Isaac Xen) + +extension String { + fileprivate func ranges(splitBy separator: Element) -> [Range] { + var startIndex = startIndex + return split(separator: separator).reduce(into: []) { ranges, substring in + _ = range(of: substring, range: startIndex.. Date: Wed, 10 Aug 2022 09:00:07 +0800 Subject: [PATCH 5/6] LMs // Ensuring Hanyu-Pinyin keys are read in lowercase. --- Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift | 2 +- Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift index be170656..8dfc1ce6 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift @@ -32,7 +32,7 @@ extension vChewing { } let arrTarget = target.dropLast().dropFirst().split(separator: ",") guard arrTarget.count == 2 else { return target } - return "(\(Tekkon.cnvHanyuPinyinToPhona(target: String(arrTarget[0]))),\(arrTarget[1]))" + return "(\(Tekkon.cnvHanyuPinyinToPhona(target: String(arrTarget[0]).lowercased())),\(arrTarget[1]))" } @discardableResult public mutating func open(_ path: String) -> Bool { diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift index 5879eb95..12fe3627 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift @@ -77,7 +77,7 @@ extension vChewing { if !neta[0].isEmpty, !neta[1].isEmpty { let theKey = shouldReverse ? String(neta[1]) : String(neta[0]) let theValue = $0 - rangeMap[Tekkon.cnvHanyuPinyinToPhona(target: theKey), default: []].append(theValue) + rangeMap[Tekkon.cnvHanyuPinyinToPhona(target: theKey.lowercased()), default: []].append(theValue) } } } From 6b3227ad98bf3cfe72da6212088362fe51e748d2 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Wed, 10 Aug 2022 09:06:23 +0800 Subject: [PATCH 6/6] Repo // Lazy-load certain user-space language models. --- Source/Modules/IMEModules/IME.swift | 12 +++++-- Source/Modules/IMEModules/mgrPrefs.swift | 40 +++++++++++++++++------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/Source/Modules/IMEModules/IME.swift b/Source/Modules/IMEModules/IME.swift index 4108c016..24e52104 100644 --- a/Source/Modules/IMEModules/IME.swift +++ b/Source/Modules/IMEModules/IME.swift @@ -77,9 +77,15 @@ public enum IME { // mgrLangModel 的 loadUserPhrases 等函式在自動讀取 dataFolderPath 時, // 如果發現自訂目錄不可用,則會自動抹去自訂目錄設定、改採預設目錄。 // 所以這裡不需要特別處理。 - mgrLangModel.loadUserAssociatesData() - mgrLangModel.loadUserPhraseReplacement() - mgrLangModel.loadUserSCPCSequencesData() + if mgrPrefs.associatedPhrasesEnabled { + mgrLangModel.loadUserAssociatesData() + } + if mgrPrefs.phraseReplacementEnabled { + mgrLangModel.loadUserPhraseReplacement() + } + if mgrPrefs.useSCPCTypingMode { + mgrLangModel.loadUserSCPCSequencesData() + } mgrLangModel.loadUserPhrasesData() if !userOnly { // mgrLangModel.loadDataModels() diff --git a/Source/Modules/IMEModules/mgrPrefs.swift b/Source/Modules/IMEModules/mgrPrefs.swift index 4b1e4232..541e5ff2 100644 --- a/Source/Modules/IMEModules/mgrPrefs.swift +++ b/Source/Modules/IMEModules/mgrPrefs.swift @@ -388,15 +388,6 @@ public enum mgrPrefs { mgrPrefs.allowBoostingSingleKanjiAsUserPhrase ? 1 : 2 } - @UserDefault(key: UserDef.kUseSCPCTypingMode.rawValue, defaultValue: false) - static var useSCPCTypingMode: Bool - - static func toggleSCPCTypingModeEnabled() -> Bool { - useSCPCTypingMode = !useSCPCTypingMode - UserDefaults.standard.set(useSCPCTypingMode, forKey: UserDef.kUseSCPCTypingMode.rawValue) - return useSCPCTypingMode - } - @UserDefault(key: UserDef.kMaxCandidateLength.rawValue, defaultValue: 10) static var maxCandidateLength: Int @@ -564,8 +555,29 @@ public enum mgrPrefs { } } + @UserDefault(key: UserDef.kUseSCPCTypingMode.rawValue, defaultValue: false) + static var useSCPCTypingMode: Bool { + willSet { + DispatchQueue.main.asyncAfter(deadline: DispatchTime.now()) { + mgrLangModel.loadUserSCPCSequencesData() + } + } + } + + static func toggleSCPCTypingModeEnabled() -> Bool { + useSCPCTypingMode = !useSCPCTypingMode + UserDefaults.standard.set(useSCPCTypingMode, forKey: UserDef.kUseSCPCTypingMode.rawValue) + return useSCPCTypingMode + } + @UserDefault(key: UserDef.kPhraseReplacementEnabled.rawValue, defaultValue: false) - static var phraseReplacementEnabled: Bool + static var phraseReplacementEnabled: Bool { + willSet { + DispatchQueue.main.asyncAfter(deadline: DispatchTime.now()) { + mgrLangModel.loadUserPhraseReplacement() + } + } + } static func togglePhraseReplacementEnabled() -> Bool { phraseReplacementEnabled = !phraseReplacementEnabled @@ -575,7 +587,13 @@ public enum mgrPrefs { } @UserDefault(key: UserDef.kAssociatedPhrasesEnabled.rawValue, defaultValue: false) - static var associatedPhrasesEnabled: Bool + static var associatedPhrasesEnabled: Bool { + willSet { + DispatchQueue.main.asyncAfter(deadline: DispatchTime.now()) { + mgrLangModel.loadUserAssociatesData() + } + } + } static func toggleAssociatedPhrasesEnabled() -> Bool { associatedPhrasesEnabled = !associatedPhrasesEnabled