diff --git a/Source/Modules/ControllerModules/SyllableComposer.swift b/Source/Modules/ControllerModules/SyllableComposer.swift index a9b5d26e..e6f00eca 100644 --- a/Source/Modules/ControllerModules/SyllableComposer.swift +++ b/Source/Modules/ControllerModules/SyllableComposer.swift @@ -922,7 +922,8 @@ public struct Tekkon { /// - newToneOne: 對陰平指定新的標記。預設情況下該標記為空字串。 /// - Returns: 轉換結果。 static func cnvHanyuPinyinToPhona(target: String, newToneOne: String = "") -> String { - if target.contains("_") { return target } + /// 如果當前內容有任何除了半形英數內容以外的內容的話,就直接放棄轉換。 + if target.contains("_") || !target.isNotPureAlphanumeral { return target } var result = target for key in Tekkon.mapHanyuPinyin.keys.sorted(by: { $0.count > $1.count }) { guard let value = Tekkon.mapHanyuPinyin[key] else { continue } @@ -1446,3 +1447,12 @@ public struct Tekkon { "s": "ㄙ", "t": "ㄊ", "u": "ㄡ", "v": "ㄩ", "w": "ㄨ", "x": "ㄒ", "y": "ㄧ", "z": "ㄗ", " ": " ", ] } + +/// 檢測字串是否包含半形英數內容 +extension String { + fileprivate var isNotPureAlphanumeral: Bool { + let regex = ".*[^A-Za-z0-9].*" + let testString = NSPredicate(format: "SELF MATCHES %@", regex) + return testString.evaluate(with: self) + } +} diff --git a/Source/Modules/IMEModules/IME.swift b/Source/Modules/IMEModules/IME.swift index 3cc7405e..24e52104 100644 --- a/Source/Modules/IMEModules/IME.swift +++ b/Source/Modules/IMEModules/IME.swift @@ -77,8 +77,15 @@ public enum IME { // mgrLangModel 的 loadUserPhrases 等函式在自動讀取 dataFolderPath 時, // 如果發現自訂目錄不可用,則會自動抹去自訂目錄設定、改採預設目錄。 // 所以這裡不需要特別處理。 - mgrLangModel.loadUserAssociatesData() - mgrLangModel.loadUserPhraseReplacement() + if mgrPrefs.associatedPhrasesEnabled { + mgrLangModel.loadUserAssociatesData() + } + if mgrPrefs.phraseReplacementEnabled { + mgrLangModel.loadUserPhraseReplacement() + } + if mgrPrefs.useSCPCTypingMode { + mgrLangModel.loadUserSCPCSequencesData() + } mgrLangModel.loadUserPhrasesData() if !userOnly { // mgrLangModel.loadDataModels() diff --git a/Source/Modules/IMEModules/mgrPrefs.swift b/Source/Modules/IMEModules/mgrPrefs.swift index 4b1e4232..541e5ff2 100644 --- a/Source/Modules/IMEModules/mgrPrefs.swift +++ b/Source/Modules/IMEModules/mgrPrefs.swift @@ -388,15 +388,6 @@ public enum mgrPrefs { mgrPrefs.allowBoostingSingleKanjiAsUserPhrase ? 1 : 2 } - @UserDefault(key: UserDef.kUseSCPCTypingMode.rawValue, defaultValue: false) - static var useSCPCTypingMode: Bool - - static func toggleSCPCTypingModeEnabled() -> Bool { - useSCPCTypingMode = !useSCPCTypingMode - UserDefaults.standard.set(useSCPCTypingMode, forKey: UserDef.kUseSCPCTypingMode.rawValue) - return useSCPCTypingMode - } - @UserDefault(key: UserDef.kMaxCandidateLength.rawValue, defaultValue: 10) static var maxCandidateLength: Int @@ -564,8 +555,29 @@ public enum mgrPrefs { } } + @UserDefault(key: UserDef.kUseSCPCTypingMode.rawValue, defaultValue: false) + static var useSCPCTypingMode: Bool { + willSet { + DispatchQueue.main.asyncAfter(deadline: DispatchTime.now()) { + mgrLangModel.loadUserSCPCSequencesData() + } + } + } + + static func toggleSCPCTypingModeEnabled() -> Bool { + useSCPCTypingMode = !useSCPCTypingMode + UserDefaults.standard.set(useSCPCTypingMode, forKey: UserDef.kUseSCPCTypingMode.rawValue) + return useSCPCTypingMode + } + @UserDefault(key: UserDef.kPhraseReplacementEnabled.rawValue, defaultValue: false) - static var phraseReplacementEnabled: Bool + static var phraseReplacementEnabled: Bool { + willSet { + DispatchQueue.main.asyncAfter(deadline: DispatchTime.now()) { + mgrLangModel.loadUserPhraseReplacement() + } + } + } static func togglePhraseReplacementEnabled() -> Bool { phraseReplacementEnabled = !phraseReplacementEnabled @@ -575,7 +587,13 @@ public enum mgrPrefs { } @UserDefault(key: UserDef.kAssociatedPhrasesEnabled.rawValue, defaultValue: false) - static var associatedPhrasesEnabled: Bool + static var associatedPhrasesEnabled: Bool { + willSet { + DispatchQueue.main.asyncAfter(deadline: DispatchTime.now()) { + mgrLangModel.loadUserAssociatesData() + } + } + } static func toggleAssociatedPhrasesEnabled() -> Bool { associatedPhrasesEnabled = !associatedPhrasesEnabled diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index e0c3c681..ad8f742e 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -76,6 +76,7 @@ extension vChewing { ) var lmReplacements = LMReplacments() var lmAssociates = LMAssociates() + var lmPlainBopomofo = LMPlainBopomofo() // MARK: - 工具函式 @@ -166,6 +167,16 @@ extension vChewing { } } + public func loadUserSCPCSequencesData(path: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmPlainBopomofo.close() + lmPlainBopomofo.open(path) + IME.prtDebugIntel("lmPlainBopomofo: \(lmPlainBopomofo.count) entries of data loaded from: \(path)") + } else { + IME.prtDebugIntel("lmPlainBopomofo: File access failure: \(path)") + } + } + // MARK: - 核心函式(對外) /// 威注音輸入法目前尚未具備對雙元圖的處理能力,故停用該函式。 @@ -181,6 +192,11 @@ extension vChewing { /// 準備不同的語言模組容器,開始逐漸往容器陣列內塞入資料。 var rawAllUnigrams: [Megrez.Unigram] = [] + // 如果有檢測到使用者自訂逐字選字語料庫內的相關資料的話,在這裡先插入。 + if mgrPrefs.useSCPCTypingMode { + rawAllUnigrams += lmPlainBopomofo.valuesFor(key: key).map { Megrez.Unigram.init(value: $0, score: 0) } + } + // 用 reversed 指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。 // 這樣一來就可以在就地新增語彙時徹底複寫優先權。 // 將兩句差分也是為了讓 rawUserUnigrams 的類型不受可能的影響。 diff --git a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift index be170656..8dfc1ce6 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift @@ -32,7 +32,7 @@ extension vChewing { } let arrTarget = target.dropLast().dropFirst().split(separator: ",") guard arrTarget.count == 2 else { return target } - return "(\(Tekkon.cnvHanyuPinyinToPhona(target: String(arrTarget[0]))),\(arrTarget[1]))" + return "(\(Tekkon.cnvHanyuPinyinToPhona(target: String(arrTarget[0]).lowercased())),\(arrTarget[1]))" } @discardableResult public mutating func open(_ path: String) -> Bool { diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift index 5879eb95..12fe3627 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift @@ -77,7 +77,7 @@ extension vChewing { if !neta[0].isEmpty, !neta[1].isEmpty { let theKey = shouldReverse ? String(neta[1]) : String(neta[0]) let theValue = $0 - rangeMap[Tekkon.cnvHanyuPinyinToPhona(target: theKey), default: []].append(theValue) + rangeMap[Tekkon.cnvHanyuPinyinToPhona(target: theKey.lowercased()), default: []].append(theValue) } } } diff --git a/Source/Modules/LangModelRelated/SubLMs/lmPlainBopomofo.swift b/Source/Modules/LangModelRelated/SubLMs/lmPlainBopomofo.swift new file mode 100644 index 00000000..356bcce3 --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLMs/lmPlainBopomofo.swift @@ -0,0 +1,83 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// StringView Ranges extension by (c) 2022 and onwards Isaac Xen (MIT License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) +// ... with NTL restriction stating that: +// No trademark license is granted to use the trade names, trademarks, service +// marks, or product names of Contributor, except as required to fulfill notice +// requirements defined in MIT License. + +import Foundation + +extension vChewing { + @frozen public struct LMPlainBopomofo { + var rangeMap: [String: String] = [:] + + public var count: Int { + rangeMap.count + } + + public init() { + rangeMap = [:] + } + + public func isLoaded() -> Bool { + !rangeMap.isEmpty + } + + @discardableResult public mutating func open(_ path: String) -> Bool { + if isLoaded() { + return false + } + + do { + let rawData = try Data(contentsOf: URL(fileURLWithPath: path)) + let rawPlist: [String: String] = + try PropertyListSerialization.propertyList(from: rawData, format: nil) as? [String: String] ?? .init() + rangeMap = rawPlist + } catch { + IME.prtDebugIntel("\(error)") + IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") + return false + } + + return true + } + + public mutating func close() { + if isLoaded() { + rangeMap.removeAll() + } + } + + public func dump() { + // We remove this function in order to reduce out maintenance workload. + // This function will be implemented only if further hard-necessity comes. + } + + public func valuesFor(key: String) -> [String] { + var pairs: [String] = [] + if let arrRangeRecords: String = rangeMap[key] { + pairs.append(contentsOf: arrRangeRecords.map({ String($0) })) + } + var set = Set() + return pairs.filter { set.insert($0).inserted } + } + + public func hasValuesFor(key: String) -> Bool { rangeMap.keys.contains(key) } + } +} + +// MARK: - StringView Ranges Extension (by Isaac Xen) + +extension String { + fileprivate func ranges(splitBy separator: Element) -> [Range] { + var startIndex = startIndex + return split(separator: separator).reduce(into: []) { ranges, substring in + _ = range(of: substring, range: startIndex.. URL { + let fileName = (mode == InputMode.imeModeCHT) ? "data-plain-bpmf-cht.plist" : "data-plain-bpmf-chs.plist" + return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) + } + /// 使用者波浪符號選單資料路徑。 /// - Returns: 資料路徑(URL)。 static func userSymbolNodeDataURL() -> URL { @@ -311,6 +328,7 @@ enum mgrLangModel { userAssociatesDataURL(mode), populateWithTemplate: mode == .imeModeCHS ? kTemplateNameUserAssociatesCHS : kTemplateNameUserAssociatesCHT ) + || !ensureFileExists(userSCPCSequencesURL(mode)) || !ensureFileExists(userFilteredDataURL(mode), populateWithTemplate: kTemplateNameUserExclusions) || !ensureFileExists(userReplacementsDataURL(mode), populateWithTemplate: kTemplateNameUserReplacements) || !ensureFileExists(userSymbolDataURL(mode), populateWithTemplate: kTemplateNameUserSymbolPhrases) diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index f9c7d8ea..a6ba5c00 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -117,6 +117,7 @@ 5BF9DA2A28840E6200DBD48E /* template-replacements.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5BF9DA2528840E6200DBD48E /* template-replacements.txt */; }; 5BF9DA2B28840E6200DBD48E /* template-userphrases.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5BF9DA2628840E6200DBD48E /* template-userphrases.txt */; }; 5BF9DA2D288427E000DBD48E /* template-associatedPhrases-cht.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5BF9DA2C2884247800DBD48E /* template-associatedPhrases-cht.txt */; }; + 5BF9EC1628A2BFC600333639 /* lmPlainBopomofo.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BF9EC1528A2BFC600333639 /* lmPlainBopomofo.swift */; }; 5BFDF011289635C100417BBC /* ctlCandidateIMK.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BFDF010289635C100417BBC /* ctlCandidateIMK.swift */; }; 6A187E2616004C5900466B2E /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6A187E2816004C5900466B2E /* MainMenu.xib */; }; 6A225A1F23679F2600F685C6 /* NotarizedArchives in Resources */ = {isa = PBXBuildFile; fileRef = 6A225A1E23679F2600F685C6 /* NotarizedArchives */; }; @@ -333,6 +334,7 @@ 5BF9DA2528840E6200DBD48E /* template-replacements.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; lineEnding = 0; path = "template-replacements.txt"; sourceTree = ""; usesTabs = 0; }; 5BF9DA2628840E6200DBD48E /* template-userphrases.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; lineEnding = 0; path = "template-userphrases.txt"; sourceTree = ""; usesTabs = 0; }; 5BF9DA2C2884247800DBD48E /* template-associatedPhrases-cht.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; lineEnding = 0; name = "template-associatedPhrases-cht.txt"; path = "../Data/components/cht/template-associatedPhrases-cht.txt"; sourceTree = ""; }; + 5BF9EC1528A2BFC600333639 /* lmPlainBopomofo.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmPlainBopomofo.swift; sourceTree = ""; }; 5BFDF010289635C100417BBC /* ctlCandidateIMK.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ctlCandidateIMK.swift; sourceTree = ""; }; 5BFDF48C27B51867009523B6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/Main.strings"; sourceTree = ""; }; 6A0D4EA215FC0D2D00ABF4B3 /* vChewing.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = vChewing.app; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -432,6 +434,7 @@ 5B407309281672610023DFFF /* lmAssociates.swift */, 5B887F2F2826AEA400B6651E /* lmCoreEX.swift */, 5B54E742283A7D89001ECBDC /* lmCoreNS.swift */, + 5BF9EC1528A2BFC600333639 /* lmPlainBopomofo.swift */, 5B40730A281672610023DFFF /* lmReplacements.swift */, 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */, ); @@ -1177,6 +1180,7 @@ 5BA9FD1027FEDB6B002DE248 /* suiPrefPaneKeyboard.swift in Sources */, 5B3133BF280B229700A4A505 /* KeyHandler_States.swift in Sources */, 5B2170E1289FACAD00BE7304 /* 0_Megrez.swift in Sources */, + 5BF9EC1628A2BFC600333639 /* lmPlainBopomofo.swift in Sources */, 5B3A87BC28597CDB0090E163 /* LMSymbolNode.swift in Sources */, 5BA9FD4327FEF3C8002DE248 /* Preferences.swift in Sources */, 5BA9FD4427FEF3C8002DE248 /* SegmentedControlStyleViewController.swift in Sources */,