diff --git a/Source/Data b/Source/Data index 1b67116c..97d518ca 160000 --- a/Source/Data +++ b/Source/Data @@ -1 +1 @@ -Subproject commit 1b67116c77dd654f156c43754694ac3a17a19a3a +Subproject commit 97d518cac19c96c5cd397bbdbcf8f95d2e967e73 diff --git a/Source/Modules/ControllerModules/InputSignal.swift b/Source/Modules/ControllerModules/InputSignal.swift index a6f6a423..fa4e2d2c 100644 --- a/Source/Modules/ControllerModules/InputSignal.swift +++ b/Source/Modules/ControllerModules/InputSignal.swift @@ -134,17 +134,15 @@ struct InputSignal: CustomStringConvertible { private var extraChooseCandidateKeyReverse: KeyCode = .kNone private var absorbedArrowKey: KeyCode = .kNone private var verticalTypingOnlyChooseCandidateKey: KeyCode = .kNone - private(set) var emacsKey: vChewingEmacsKey + private(set) var emacsKey: EmacsKey public init( inputText: String?, keyCode: UInt16, charCode: UInt16, flags: NSEvent.ModifierFlags, isVerticalTyping: Bool, inputTextIgnoringModifiers: String? = nil ) { - let inputText = AppleKeyboardConverter.cnvStringApple2ABC(inputText ?? "") - let inputTextIgnoringModifiers = AppleKeyboardConverter.cnvStringApple2ABC( - inputTextIgnoringModifiers ?? inputText) - self.inputText = inputText - self.inputTextIgnoringModifiers = inputTextIgnoringModifiers + self.inputText = AppleKeyboardConverter.cnvStringApple2ABC(inputText ?? "") + self.inputTextIgnoringModifiers = AppleKeyboardConverter.cnvStringApple2ABC( + inputTextIgnoringModifiers ?? inputText ?? "") self.flags = flags isFlagChanged = false isTypingVertical = isVerticalTyping @@ -163,7 +161,7 @@ struct InputSignal: CustomStringConvertible { event.charactersIgnoringModifiers ?? "") keyCode = event.keyCode flags = event.modifierFlags - isFlagChanged = (event.type == .flagsChanged) ? true : false + isFlagChanged = (event.type == .flagsChanged) isTypingVertical = isVerticalTyping let charCode: UInt16 = { // 這裡不用「count > 0」,因為該整數變數只要「!isEmpty」那就必定滿足這個條件。 @@ -230,14 +228,14 @@ struct InputSignal: CustomStringConvertible { flags.contains([.control]) && inputText?.first?.isLetter ?? false } - var isOptionHotKey: Bool { - flags.contains([.option]) && inputText?.first?.isLetter ?? false - } - var isOptionHold: Bool { flags.contains([.option]) } + var isOptionHotKey: Bool { + flags.contains([.option]) && inputText?.first?.isLetter ?? false + } + var isCapsLockOn: Bool { flags.contains([.capsLock]) } @@ -334,7 +332,7 @@ struct InputSignal: CustomStringConvertible { KeyCode(rawValue: keyCode) == extraChooseCandidateKeyReverse } - var isverticalTypingOnlyChooseCandidateKey: Bool { + var isVerticalTypingOnlyChooseCandidateKey: Bool { KeyCode(rawValue: keyCode) == verticalTypingOnlyChooseCandidateKey } @@ -350,7 +348,7 @@ struct InputSignal: CustomStringConvertible { } } -enum vChewingEmacsKey: UInt16 { +enum EmacsKey: UInt16 { case none = 0 case forward = 6 // F case backward = 2 // B @@ -361,10 +359,10 @@ enum vChewingEmacsKey: UInt16 { } enum EmacsKeyHelper { - static func detect(charCode: UniChar, flags: NSEvent.ModifierFlags) -> vChewingEmacsKey { + static func detect(charCode: UniChar, flags: NSEvent.ModifierFlags) -> EmacsKey { let charCode = AppleKeyboardConverter.cnvApple2ABC(charCode) if flags.contains(.control) { - return vChewingEmacsKey(rawValue: charCode) ?? .none + return EmacsKey(rawValue: charCode) ?? .none } return .none } diff --git a/Source/Modules/ControllerModules/InputState.swift b/Source/Modules/ControllerModules/InputState.swift index ee285409..9a6a2df1 100644 --- a/Source/Modules/ControllerModules/InputState.swift +++ b/Source/Modules/ControllerModules/InputState.swift @@ -137,7 +137,7 @@ class InputState { class NotEmpty: InputState { private(set) var composingBuffer: String private(set) var cursorIndex: Int = 0 { didSet { cursorIndex = max(cursorIndex, 0) } } - var composingBufferConverted: String { + public var composingBufferConverted: String { let converted = IME.kanjiConversionIfRequired(composingBuffer) if converted.utf16.count != composingBuffer.utf16.count || converted.count != composingBuffer.count @@ -153,10 +153,10 @@ class InputState { defer { self.cursorIndex = cursorIndex } } - var attributedString: NSAttributedString { + var attributedString: NSMutableAttributedString { /// 考慮到因為滑鼠點擊等其它行為導致的組字區內容遞交情況, /// 這裡對組字區內容也加上康熙字轉換或者 JIS 漢字轉換處理。 - let attributedString = NSAttributedString( + let attributedString = NSMutableAttributedString( string: composingBufferConverted, attributes: [ .underlineStyle: NSUnderlineStyle.single.rawValue, @@ -270,7 +270,7 @@ class InputState { defer { self.markerIndex = markerIndex } } - override var attributedString: NSAttributedString { + override var attributedString: NSMutableAttributedString { /// 考慮到因為滑鼠點擊等其它行為導致的組字區內容遞交情況, /// 這裡對組字區內容也加上康熙字轉換或者 JIS 漢字轉換處理。 let attributedString = NSMutableAttributedString(string: composingBufferConverted) @@ -392,8 +392,8 @@ class InputState { // 會出現符號選字窗無法響應方向鍵的問題。 // 如有誰要修奇摩注音的一點通選單的話,修復原理也是一樣的。 // Crediting Qwertyyb: https://github.com/qwertyyb/Fire/issues/55#issuecomment-1133497700 - override var attributedString: NSAttributedString { - let attributedString = NSAttributedString( + override var attributedString: NSMutableAttributedString { + let attributedString = NSMutableAttributedString( string: " ", attributes: [ .underlineStyle: NSUnderlineStyle.single.rawValue, diff --git a/Source/Modules/ControllerModules/KeyHandler_Core.swift b/Source/Modules/ControllerModules/KeyHandler_Core.swift index 83e7a27f..31c918b5 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Core.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Core.swift @@ -156,8 +156,8 @@ class KeyHandler { /// 不會是 nil,但那些負責接收結果的函式會對空白陣列結果做出正確的處理。 func buildAssociatePhraseArray(withKey key: String) -> [String] { var arrResult: [String] = [] - if currentLM.hasAssociatedPhrasesForKey(key) { - arrResult.append(contentsOf: currentLM.associatedPhrasesForKey(key)) + if currentLM.hasAssociatedPhrasesFor(key: key) { + arrResult.append(contentsOf: currentLM.associatedPhrasesFor(key: key)) } return arrResult } @@ -170,37 +170,39 @@ class KeyHandler { func fixNode(value: String, respectCursorPushing: Bool = true) { let cursorIndex = min(actualCandidateCursorIndex + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength) compositor.grid.fixNodeSelectedCandidate(location: cursorIndex, value: value) - // // 因半衰模組失能,故禁用之。 - // let selectedNode: Megrez.NodeAnchor = compositor.grid.fixNodeSelectedCandidate( - // location: cursorIndex, value: value - // ) - // // 不要針對逐字選字模式啟用臨時半衰記憶模型。 - // if !mgrPrefs.useSCPCTypingMode { - // // 所有讀音數與字符數不匹配的情況均不得塞入半衰記憶模組。 - // var addToUserOverrideModel = true - // if selectedNode.spanningLength != value.count { - // IME.prtDebugIntel("UOM: SpanningLength != value.count, dismissing.") - // addToUserOverrideModel = false - // } - // if addToUserOverrideModel { - // if let theNode = selectedNode.node { - // // 威注音的 SymbolLM 的 Score 是 -12,符合該條件的內容不得塞入半衰記憶模組。 - // if theNode.scoreFor(candidate: value) <= -12 { - // IME.prtDebugIntel("UOM: Score <= -12, dismissing.") - // addToUserOverrideModel = false - // } - // } - // } - // if addToUserOverrideModel { - // IME.prtDebugIntel("UOM: Start Observation.") - // // 令半衰記憶模組觀測給定的 trigram。 - // // 這個過程會讓半衰引擎根據當前上下文生成 trigram 索引鍵。 - // currentUOM.observe( - // walkedNodes: walkedAnchors, cursorIndex: cursorIndex, candidate: value, - // timestamp: NSDate().timeIntervalSince1970 - // ) - // } - // } + // 開始讓半衰模組觀察目前的狀況。 + let selectedNode: Megrez.NodeAnchor = compositor.grid.fixNodeSelectedCandidate( + location: cursorIndex, value: value + ) + // 不要針對逐字選字模式啟用臨時半衰記憶模型。 + if !mgrPrefs.useSCPCTypingMode { + // 所有讀音數與字符數不匹配的情況均不得塞入半衰記憶模組。 + var addToUserOverrideModel = true + if selectedNode.spanningLength != value.count { + IME.prtDebugIntel("UOM: SpanningLength != value.count, dismissing.") + addToUserOverrideModel = false + } + if addToUserOverrideModel { + if let theNode = selectedNode.node { + // 威注音的 SymbolLM 的 Score 是 -12,符合該條件的內容不得塞入半衰記憶模組。 + if theNode.scoreFor(candidate: value) <= -12 { + IME.prtDebugIntel("UOM: Score <= -12, dismissing.") + addToUserOverrideModel = false + } + } + } + if addToUserOverrideModel { + IME.prtDebugIntel("UOM: Start Observation.") + // 令半衰記憶模組觀測給定的 trigram。 + // 這個過程會讓半衰引擎根據當前上下文生成 trigram 索引鍵。 + currentUOM.observe( + walkedAnchors: walkedAnchors, cursorIndex: cursorIndex, candidate: value, + timestamp: NSDate().timeIntervalSince1970 + ) + } + } + + // 開始爬軌。 walk() /// 若偏好設定內啟用了相關選項,則會在選字之後始終將游標推送至選字厚的節錨的前方。 @@ -237,40 +239,54 @@ class KeyHandler { /// 獲取候選字詞陣列資料內容。 var candidatesArray: [String] { + var arrNodes: [Megrez.NodeAnchor] = rawNodes var arrCandidates: [String] = [] - var arrNodes: [Megrez.NodeAnchor] = [] - arrNodes.append(contentsOf: rawNodes) /// 原理:nodes 這個回饋結果包含一堆子陣列,分別對應不同詞長的候選字。 /// 這裡先對陣列排序、讓最長候選字的子陣列的優先權最高。 /// 這個過程不會傷到子陣列內部的排序。 - if !arrNodes.isEmpty { - // sort the nodes, so that longer nodes (representing longer phrases) - // are placed at the top of the candidate list - arrNodes.sort { $0.keyLength > $1.keyLength } + if arrNodes.isEmpty { return arrCandidates } - // then use the Swift trick to retrieve the candidates for each node at/crossing the cursor - for currentNodeAnchor in arrNodes { - if let currentNode = currentNodeAnchor.node { - for currentCandidate in currentNode.candidates { - arrCandidates.append(currentCandidate.value) - } + // sort the nodes, so that longer nodes (representing longer phrases) + // are placed at the top of the candidate list + arrNodes = arrNodes.stableSort { $0.keyLength > $1.keyLength } + + // then use the Swift trick to retrieve the candidates for each node at/crossing the cursor + for currentNodeAnchor in arrNodes { + if let currentNode = currentNodeAnchor.node { + for currentCandidate in currentNode.candidates { + // 選字窗的內容的康熙轉換 / JIS 轉換不能放在這裡處理,會影響選字有效性。 + // 選字的原理是拿著具體的候選字詞的字串去當前的節錨下找出對應的候選字詞(X元圖)。 + // 一旦在這裡轉換了,節錨內的某些元圖就無法被選中。 + arrCandidates.append(currentCandidate.value) } } } + if mgrPrefs.fetchSuggestionsFromUserOverrideModel, !mgrPrefs.useSCPCTypingMode { + let arrSuggestedUnigrams: [Megrez.Unigram] = fetchSuggestedCandidates().stableSort { $0.score > $1.score } + let arrSuggestedCandidates: [String] = arrSuggestedUnigrams.map { $0.keyValue.value } + arrCandidates = arrSuggestedCandidates.filter { arrCandidates.contains($0) } + arrCandidates + arrCandidates = arrCandidates.deduplicate + arrCandidates = arrCandidates.stableSort { $0.count > $1.count } + } return arrCandidates } - /// 向半衰引擎詢問可能的選字建議。 - func dealWithOverrideModelSuggestions() { + /// 向半衰引擎詢問可能的選字建議。拿到的結果會是一個單元圖陣列。 + func fetchSuggestedCandidates() -> [Megrez.Unigram] { + currentUOM.suggest( + walkedAnchors: walkedAnchors, cursorIndex: compositorCursorIndex, + timestamp: NSDate().timeIntervalSince1970) + } + + /// 向半衰引擎詢問可能的選字建議、且套用給組字器內的當前游標位置。 + func fetchAndApplySuggestionsFromUserOverrideModel() { + /// 如果逐字選字模式有啟用的話,直接放棄執行這個函式。 + if mgrPrefs.useSCPCTypingMode { return } + /// 如果這個開關沒打開的話,直接放棄執行這個函式。 + if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return } /// 先就當前上下文讓半衰引擎重新生成 trigram 索引鍵。 - let overrideValue = - mgrPrefs.useSCPCTypingMode - ? "" - : currentUOM.suggest( - walkedNodes: walkedAnchors, cursorIndex: compositorCursorIndex, - timestamp: NSDate().timeIntervalSince1970 - ) + let overrideValue = fetchSuggestedCandidates().first?.keyValue.value ?? "" /// 再拿著索引鍵去問半衰模組有沒有選字建議。有的話就遵循之、讓天權星引擎對指定節錨下的節點複寫權重。 if !overrideValue.isEmpty { diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleCandidate.swift b/Source/Modules/ControllerModules/KeyHandler_HandleCandidate.swift index 1f8462fb..a861c828 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleCandidate.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleCandidate.swift @@ -122,7 +122,7 @@ extension KeyHandler { // MARK: PgDn - if input.isPageDown || input.emacsKey == vChewingEmacsKey.nextPage { + if input.isPageDown || input.emacsKey == EmacsKey.nextPage { let updated: Bool = ctlCandidateCurrent.showNextPage() if !updated { IME.prtDebugIntel("9B691919") @@ -166,7 +166,7 @@ extension KeyHandler { // MARK: EmacsKey Backward - if input.emacsKey == vChewingEmacsKey.backward { + if input.emacsKey == EmacsKey.backward { let updated: Bool = ctlCandidateCurrent.highlightPreviousCandidate() if !updated { IME.prtDebugIntel("9B89308D") @@ -199,7 +199,7 @@ extension KeyHandler { // MARK: EmacsKey Forward - if input.emacsKey == vChewingEmacsKey.forward { + if input.emacsKey == EmacsKey.forward { let updated: Bool = ctlCandidateCurrent.highlightNextCandidate() if !updated { IME.prtDebugIntel("9B2428D") @@ -254,7 +254,7 @@ extension KeyHandler { // MARK: Home Key - if input.isHome || input.emacsKey == vChewingEmacsKey.home { + if input.isHome || input.emacsKey == EmacsKey.home { if ctlCandidateCurrent.selectedCandidateIndex == 0 { IME.prtDebugIntel("9B6EDE8D") errorCallback() @@ -278,7 +278,7 @@ extension KeyHandler { if candidates.isEmpty { return false } else { // 這裡不用「count > 0」,因為該整數變數只要「!isEmpty」那就必定滿足這個條件。 - if input.isEnd || input.emacsKey == vChewingEmacsKey.end { + if input.isEnd || input.emacsKey == EmacsKey.end { if ctlCandidateCurrent.selectedCandidateIndex == candidates.count - 1 { IME.prtDebugIntel("9B69AAAD") errorCallback() diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift index 87c21d05..519b6e58 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift @@ -83,8 +83,8 @@ extension KeyHandler { /// 如果是 ASCII 當中的不可列印的字元的話,不使用「insertText:replacementRange:」。 /// 某些應用無法正常處理非 ASCII 字符的輸入。 - /// 注意:這裡一定要用 Objective-C 的 isPrintable() 函數來處理,否則無效。 - /// 這個函數已經包裝在 CTools.h 裡面了,這樣就可以拿給 Swift 用。 + /// 注意:這裡一定要用 Objective-C 的 isPrintable() 函式來處理,否則無效。 + /// 這個函式已經包裝在 CTools.h 裡面了,這樣就可以拿給 Swift 用。 if charCode < 0x80, !CTools.isPrintable(charCode) { return false } @@ -175,27 +175,27 @@ extension KeyHandler { // 小麥注音因為使用 OVMandarin,所以不需要這樣補。但鐵恨引擎對所有聲調一視同仁。 composer.receiveKey(fromString: " ") } - let reading = composer.getComposition() // 拿取用來進行索引檢索用的注音 + let reading = composer.getComposition() // 拿取用來進行索引檢索用的注音。 // 如果輸入法的辭典索引是漢語拼音的話,要注意上一行拿到的內容得是漢語拼音。 - // 向語言模型詢問是否有對應的記錄 + // 向語言模型詢問是否有對應的記錄。 if !ifLangModelHasUnigrams(forKey: reading) { IME.prtDebugIntel("B49C0979:語彙庫內無「\(reading)」的匹配記錄。") errorCallback() composer.clear() - // 根據「組字器是否為空」來判定回呼哪一種狀態 + // 根據「組字器是否為空」來判定回呼哪一種狀態。 stateCallback((compositorLength == 0) ? InputState.EmptyIgnoringPreviousState() : buildInputtingState) - return true // 向 IMK 報告說這個按鍵訊號已經被輸入法攔截處理了 + return true // 向 IMK 報告說這個按鍵訊號已經被輸入法攔截處理了。 } - // 將該讀音插入至組字器內的軌格當中 + // 將該讀音插入至組字器內的軌格當中。 insertToCompositorAtCursor(reading: reading) - // 讓組字器反爬軌格 + // 讓組字器反爬軌格。 let textToCommit = popOverflowComposingTextAndWalk - // 看看半衰記憶模組是否會對目前的狀態給出自動選字建議 - // dealWithOverrideModelSuggestions() // 暫時禁用,因為無法使其生效。 + // 看看半衰記憶模組是否會對目前的狀態給出自動選字建議。 + fetchAndApplySuggestionsFromUserOverrideModel() // 將組字器內超出最大動態爬軌範圍的節錨都標記為「已經手動選字過」,減少之後的爬軌運算負擔。 markNodesFixedIfNecessary() @@ -257,7 +257,7 @@ extension KeyHandler { if let currentState = state as? InputState.NotEmpty, composer.isEmpty, input.isExtraChooseCandidateKey || input.isExtraChooseCandidateKeyReverse || input.isSpace || input.isPageDown || input.isPageUp || (input.isTab && mgrPrefs.specifyShiftTabKeyBehavior) - || (input.isTypingVertical && (input.isverticalTypingOnlyChooseCandidateKey)) + || (input.isTypingVertical && (input.isVerticalTypingOnlyChooseCandidateKey)) { if input.isSpace { /// 倘若沒有在偏好設定內將 Space 空格鍵設為選字窗呼叫用鍵的話……… @@ -305,7 +305,7 @@ extension KeyHandler { // MARK: Cursor backward - if input.isCursorBackward || input.emacsKey == vChewingEmacsKey.backward { + if input.isCursorBackward || input.emacsKey == EmacsKey.backward { return handleBackward( state: state, input: input, @@ -316,7 +316,7 @@ extension KeyHandler { // MARK: Cursor forward - if input.isCursorForward || input.emacsKey == vChewingEmacsKey.forward { + if input.isCursorForward || input.emacsKey == EmacsKey.forward { return handleForward( state: state, input: input, stateCallback: stateCallback, errorCallback: errorCallback ) @@ -324,13 +324,13 @@ extension KeyHandler { // MARK: Home - if input.isHome || input.emacsKey == vChewingEmacsKey.home { + if input.isHome || input.emacsKey == EmacsKey.home { return handleHome(state: state, stateCallback: stateCallback, errorCallback: errorCallback) } // MARK: End - if input.isEnd || input.emacsKey == vChewingEmacsKey.end { + if input.isEnd || input.emacsKey == EmacsKey.end { return handleEnd(state: state, stateCallback: stateCallback, errorCallback: errorCallback) } @@ -360,7 +360,7 @@ extension KeyHandler { // MARK: Delete - if input.isDelete || input.emacsKey == vChewingEmacsKey.delete { + if input.isDelete || input.emacsKey == EmacsKey.delete { return handleDelete(state: state, stateCallback: stateCallback, errorCallback: errorCallback) } diff --git a/Source/Modules/ControllerModules/KeyHandler_States.swift b/Source/Modules/ControllerModules/KeyHandler_States.swift index 82b76b1d..f40b4762 100644 --- a/Source/Modules/ControllerModules/KeyHandler_States.swift +++ b/Source/Modules/ControllerModules/KeyHandler_States.swift @@ -233,7 +233,7 @@ extension KeyHandler { } // Shift + Left - if input.isCursorBackward || input.emacsKey == vChewingEmacsKey.backward, input.isShiftHold { + if input.isCursorBackward || input.emacsKey == EmacsKey.backward, input.isShiftHold { var index = state.markerIndex if index > 0 { index = state.composingBuffer.utf16PreviousPosition(for: index) @@ -254,7 +254,7 @@ extension KeyHandler { } // Shift + Right - if input.isCursorForward || input.emacsKey == vChewingEmacsKey.forward, input.isShiftHold { + if input.isCursorForward || input.emacsKey == EmacsKey.forward, input.isShiftHold { var index = state.markerIndex if index < (state.composingBuffer.utf16.count) { index = state.composingBuffer.utf16NextPosition(for: index) @@ -755,7 +755,8 @@ extension KeyHandler { stateCallback: @escaping (InputState) -> Void, errorCallback: @escaping () -> Void ) -> Bool { - guard let state = state as? InputState.Inputting else { + if composer.isEmpty && (compositor.isEmpty || walkedAnchors.isEmpty) { return false } + guard state is InputState.Inputting else { guard state is InputState.Empty else { IME.prtDebugIntel("6044F081") errorCallback() @@ -772,7 +773,7 @@ extension KeyHandler { } // 此處僅借用該函式生成結果內的某個物件,不用糾結「是否縱排輸入」。 - let candidates = buildCandidate(state: state).candidates + let candidates = candidatesArray guard !candidates.isEmpty else { IME.prtDebugIntel("3378A6DF") errorCallback() diff --git a/Source/Modules/ControllerModules/SyllableComposer.swift b/Source/Modules/ControllerModules/SyllableComposer.swift index 6f9ff3ae..825808ea 100644 --- a/Source/Modules/ControllerModules/SyllableComposer.swift +++ b/Source/Modules/ControllerModules/SyllableComposer.swift @@ -1296,7 +1296,7 @@ public struct Tekkon { /// 倚天忘形排列預處理專用陣列,但未包含全部的映射內容。 /// /// 在這裡將二十六個字母寫全,也只是為了方便做 validity check。 - /// 這裡提前對ㄓ/ㄍ/ㄕ做處理,然後再用程式判斷介母類型、據此判斷是否需要換成ㄒ/ㄑ/ㄐ。 + /// 這裡提前對複音按鍵做處理,然後再用程式判斷介母類型、據此判斷是否需要做複音切換。 static let mapETen26StaticKeys: [String: String] = [ "a": "ㄚ", "b": "ㄅ", "c": "ㄕ", "d": "ㄉ", "e": "ㄧ", "f": "ㄈ", "g": "ㄓ", "h": "ㄏ", "i": "ㄞ", "j": "ㄖ", "k": "ㄎ", "l": "ㄌ", "m": "ㄇ", "n": "ㄋ", "o": "ㄛ", "p": "ㄆ", "q": "ㄗ", "r": "ㄜ", "s": "ㄙ", "t": "ㄊ", "u": "ㄩ", "v": "ㄍ", diff --git a/Source/Modules/ControllerModules/ctlInputMethod_Core.swift b/Source/Modules/ControllerModules/ctlInputMethod_Core.swift index d4ab7717..685093e6 100644 --- a/Source/Modules/ControllerModules/ctlInputMethod_Core.swift +++ b/Source/Modules/ControllerModules/ctlInputMethod_Core.swift @@ -63,8 +63,12 @@ class ctlInputMethod: IMKInputController { client().overrideKeyboard(withKeyboardNamed: mgrPrefs.basicKeyboardLayout) } - /// 重設按鍵調度模組。 + /// 重設按鍵調度模組,會將當前尚未遞交的內容遞交出去。 func resetKeyHandler() { + if let state = state as? InputState.NotEmpty { + /// 將傳回的新狀態交給調度函式。 + handle(state: InputState.Committing(textToCommit: state.composingBufferConverted)) + } keyHandler.clear() handle(state: InputState.Empty()) } @@ -101,7 +105,7 @@ class ctlInputMethod: IMKInputController { /// 必須加上下述條件,否則會在每次切換至輸入法本體的視窗(比如偏好設定視窗)時會卡死。 /// 這是很多 macOS 副廠輸入法的常見失誤之處。 if client().bundleIdentifier() != Bundle.main.bundleIdentifier { - // Override the keyboard layout to the basic one. + // 強制重設當前鍵盤佈局、使其與偏好設定同步。 setKeyLayout() handle(state: .Empty()) } // 除此之外就不要動了,免得在點開輸入法自身的視窗時卡死。 @@ -143,7 +147,7 @@ class ctlInputMethod: IMKInputController { /// 必須加上下述條件,否則會在每次切換至輸入法本體的視窗(比如偏好設定視窗)時會卡死。 /// 這是很多 macOS 副廠輸入法的常見失誤之處。 if client().bundleIdentifier() != Bundle.main.bundleIdentifier { - // Remember to override the keyboard layout again -- treat this as an activate event. + // 強制重設當前鍵盤佈局、使其與偏好設定同步。這裡的這一步也不能省略。 setKeyLayout() handle(state: .Empty()) } // 除此之外就不要動了,免得在點開輸入法自身的視窗時卡死。 @@ -229,10 +233,6 @@ class ctlInputMethod: IMKInputController { /// - Parameter sender: 呼叫了該函式的客體(無須使用)。 override func commitComposition(_ sender: Any!) { _ = sender // 防止格式整理工具毀掉與此對應的參數。 - if let state = state as? InputState.NotEmpty { - /// 將傳回的新狀態交給調度函式。 - handle(state: InputState.Committing(textToCommit: state.composingBuffer)) - } resetKeyHandler() } } @@ -278,6 +278,38 @@ extension ctlInputMethod { clearInlineDisplay() return } + + var identifier: AnyObject { + switch IME.currentInputMode { + case InputMode.imeModeCHS: + if #available(macOS 12.0, *) { + return "zh-Hans" as AnyObject + } + case InputMode.imeModeCHT: + if #available(macOS 12.0, *) { + return (mgrPrefs.shiftJISShinjitaiOutputEnabled || mgrPrefs.chineseConversionEnabled) + ? "ja" as AnyObject : "zh-Hant" as AnyObject + } + default: + break + } + return "" as AnyObject + } + + // [Shiki's Note] This might needs to be bug-reported to Apple: + // The LanguageIdentifier attribute of an NSAttributeString designated to + // IMK Client().SetMarkedText won't let the actual font respect your languageIdentifier + // settings. Still, this might behaves as Apple's current expectation, I'm afraid. + if #available(macOS 12.0, *) { + state.attributedString.setAttributes( + [.languageIdentifier: identifier], + range: NSRange( + location: 0, + length: state.composingBuffer.utf16.count + ) + ) + } + /// 所謂選區「selectionRange」,就是「可見游標位置」的位置,只不過長度 /// 是 0 且取代範圍(replacementRange)為「NSNotFound」罷了。 /// 也就是說,內文組字區該在哪裡出現,得由客體軟體來作主。 @@ -297,6 +329,7 @@ extension ctlInputMethod { } /// 遞交組字區內容。 + /// 注意:必須在 IMK 的 commitComposition 函式當中也間接或者直接執行這個處理。 private func commit(text: String) { let buffer = IME.kanjiConversionIfRequired(text) if buffer.isEmpty { diff --git a/Source/Modules/ControllerModules/ctlInputMethod_Menu.swift b/Source/Modules/ControllerModules/ctlInputMethod_Menu.swift index a35664f9..d1bfd7cf 100644 --- a/Source/Modules/ControllerModules/ctlInputMethod_Menu.swift +++ b/Source/Modules/ControllerModules/ctlInputMethod_Menu.swift @@ -139,6 +139,11 @@ extension ctlInputMethod { ) } + menu.addItem( + withTitle: NSLocalizedString("Optimize Memorized Phrases", comment: ""), + action: #selector(removeUnigramsFromUOM(_:)), keyEquivalent: "" + ) + menu.addItem(NSMenuItem.separator()) // --------------------- if optionKeyPressed { @@ -199,6 +204,7 @@ extension ctlInputMethod { } @objc func toggleSCPCTypingMode(_: Any?) { + resetKeyHandler() NotifierController.notify( message: String( format: "%@%@%@", NSLocalizedString("Per-Char Select Mode", comment: ""), "\n", @@ -206,10 +212,10 @@ extension ctlInputMethod { ? NSLocalizedString("NotificationSwitchON", comment: "") : NSLocalizedString("NotificationSwitchOFF", comment: "") )) - resetKeyHandler() } @objc func toggleChineseConverter(_: Any?) { + resetKeyHandler() NotifierController.notify( message: String( format: "%@%@%@", NSLocalizedString("Force KangXi Writing", comment: ""), "\n", @@ -217,10 +223,10 @@ extension ctlInputMethod { ? NSLocalizedString("NotificationSwitchON", comment: "") : NSLocalizedString("NotificationSwitchOFF", comment: "") )) - resetKeyHandler() } @objc func toggleShiftJISShinjitaiOutput(_: Any?) { + resetKeyHandler() NotifierController.notify( message: String( format: "%@%@%@", NSLocalizedString("JIS Shinjitai Output", comment: ""), "\n", @@ -228,10 +234,10 @@ extension ctlInputMethod { ? NSLocalizedString("NotificationSwitchON", comment: "") : NSLocalizedString("NotificationSwitchOFF", comment: "") )) - resetKeyHandler() } @objc func toggleHalfWidthPunctuation(_: Any?) { + resetKeyHandler() NotifierController.notify( message: String( format: "%@%@%@", NSLocalizedString("Half-Width Punctuation Mode", comment: ""), @@ -240,10 +246,10 @@ extension ctlInputMethod { ? NSLocalizedString("NotificationSwitchON", comment: "") : NSLocalizedString("NotificationSwitchOFF", comment: "") )) - resetKeyHandler() } @objc func toggleCNS11643Enabled(_: Any?) { + resetKeyHandler() NotifierController.notify( message: String( format: "%@%@%@", NSLocalizedString("CNS11643 Mode", comment: ""), "\n", @@ -251,10 +257,10 @@ extension ctlInputMethod { ? NSLocalizedString("NotificationSwitchON", comment: "") : NSLocalizedString("NotificationSwitchOFF", comment: "") )) - resetKeyHandler() } @objc func toggleSymbolEnabled(_: Any?) { + resetKeyHandler() NotifierController.notify( message: String( format: "%@%@%@", NSLocalizedString("Symbol & Emoji Input", comment: ""), "\n", @@ -262,10 +268,10 @@ extension ctlInputMethod { ? NSLocalizedString("NotificationSwitchON", comment: "") : NSLocalizedString("NotificationSwitchOFF", comment: "") )) - resetKeyHandler() } @objc func toggleAssociatedPhrasesEnabled(_: Any?) { + resetKeyHandler() NotifierController.notify( message: String( format: "%@%@%@", NSLocalizedString("Per-Char Associated Phrases", comment: ""), @@ -274,10 +280,10 @@ extension ctlInputMethod { ? NSLocalizedString("NotificationSwitchON", comment: "") : NSLocalizedString("NotificationSwitchOFF", comment: "") )) - resetKeyHandler() } @objc func togglePhraseReplacement(_: Any?) { + resetKeyHandler() NotifierController.notify( message: String( format: "%@%@%@", NSLocalizedString("Use Phrase Replacement", comment: ""), "\n", @@ -285,7 +291,6 @@ extension ctlInputMethod { ? NSLocalizedString("NotificationSwitchON", comment: "") : NSLocalizedString("NotificationSwitchOFF", comment: "") )) - resetKeyHandler() } @objc func selfUninstall(_: Any?) { @@ -311,38 +316,38 @@ extension ctlInputMethod { } @objc func openUserPhrases(_: Any?) { - IME.openPhraseFile(userFileAt: mgrLangModel.userPhrasesDataPath(IME.getInputMode())) - if NSEvent.modifierFlags.contains(.option), mgrPrefs.isDebugModeEnabled { - IME.openPhraseFile(userFileAt: mgrLangModel.userPhrasesDataPath(IME.getInputMode(isReversed: true))) + IME.openPhraseFile(fromURL: mgrLangModel.userPhrasesDataURL(IME.getInputMode())) + if NSEvent.modifierFlags.contains(.option) { + IME.openPhraseFile(fromURL: mgrLangModel.userPhrasesDataURL(IME.getInputMode(isReversed: true))) } } @objc func openExcludedPhrases(_: Any?) { - IME.openPhraseFile(userFileAt: mgrLangModel.excludedPhrasesDataPath(IME.getInputMode())) - if NSEvent.modifierFlags.contains(.option), mgrPrefs.isDebugModeEnabled { - IME.openPhraseFile(userFileAt: mgrLangModel.excludedPhrasesDataPath(IME.getInputMode(isReversed: true))) + IME.openPhraseFile(fromURL: mgrLangModel.userFilteredDataURL(IME.getInputMode())) + if NSEvent.modifierFlags.contains(.option) { + IME.openPhraseFile(fromURL: mgrLangModel.userFilteredDataURL(IME.getInputMode(isReversed: true))) } } @objc func openUserSymbols(_: Any?) { - IME.openPhraseFile(userFileAt: mgrLangModel.userSymbolDataPath(IME.getInputMode())) - if NSEvent.modifierFlags.contains(.option), mgrPrefs.isDebugModeEnabled { - IME.openPhraseFile(userFileAt: mgrLangModel.userSymbolDataPath(IME.getInputMode(isReversed: true))) + IME.openPhraseFile(fromURL: mgrLangModel.userSymbolDataURL(IME.getInputMode())) + if NSEvent.modifierFlags.contains(.option) { + IME.openPhraseFile(fromURL: mgrLangModel.userSymbolDataURL(IME.getInputMode(isReversed: true))) } } @objc func openPhraseReplacement(_: Any?) { - IME.openPhraseFile(userFileAt: mgrLangModel.phraseReplacementDataPath(IME.getInputMode())) - if NSEvent.modifierFlags.contains(.option), mgrPrefs.isDebugModeEnabled { - IME.openPhraseFile(userFileAt: mgrLangModel.phraseReplacementDataPath(IME.getInputMode(isReversed: true))) + IME.openPhraseFile(fromURL: mgrLangModel.userReplacementsDataURL(IME.getInputMode())) + if NSEvent.modifierFlags.contains(.option) { + IME.openPhraseFile(fromURL: mgrLangModel.userReplacementsDataURL(IME.getInputMode(isReversed: true))) } } @objc func openAssociatedPhrases(_: Any?) { - IME.openPhraseFile(userFileAt: mgrLangModel.userAssociatedPhrasesDataPath(IME.getInputMode())) - if NSEvent.modifierFlags.contains(.option), mgrPrefs.isDebugModeEnabled { + IME.openPhraseFile(fromURL: mgrLangModel.userAssociatesDataURL(IME.getInputMode())) + if NSEvent.modifierFlags.contains(.option) { IME.openPhraseFile( - userFileAt: mgrLangModel.userAssociatedPhrasesDataPath(IME.getInputMode(isReversed: true))) + fromURL: mgrLangModel.userAssociatesDataURL(IME.getInputMode(isReversed: true))) } } @@ -350,6 +355,13 @@ extension ctlInputMethod { IME.initLangModels(userOnly: true) } + @objc func removeUnigramsFromUOM(_: Any?) { + mgrLangModel.removeUnigramsFromUserOverrideModel(IME.getInputMode()) + if NSEvent.modifierFlags.contains(.option) { + mgrLangModel.removeUnigramsFromUserOverrideModel(IME.getInputMode(isReversed: true)) + } + } + @objc func showAbout(_: Any?) { (NSApp.delegate as? AppDelegate)?.showAbout() NSApp.activate(ignoringOtherApps: true) diff --git a/Source/Modules/IMEModules/IME.swift b/Source/Modules/IMEModules/IME.swift index 043befa1..390250ae 100644 --- a/Source/Modules/IMEModules/IME.swift +++ b/Source/Modules/IMEModules/IME.swift @@ -48,7 +48,7 @@ public enum IME { switch (mgrPrefs.chineseConversionEnabled, mgrPrefs.shiftJISShinjitaiOutputEnabled) { case (false, true): return vChewingKanjiConverter.cnvTradToJIS(text) case (true, false): return vChewingKanjiConverter.cnvTradToKangXi(text) - // 本來這兩個開關不該同時開啟的,但萬一被開啟了的話就這樣處理: + // 本來這兩個開關不該同時開啟的,但萬一被同時開啟了的話就這樣處理: case (true, true): return vChewingKanjiConverter.cnvTradToJIS(text) case (false, false): return text } @@ -122,6 +122,10 @@ public enum IME { // MARK: - Open a phrase data file. + static func openPhraseFile(fromURL url: URL) { + openPhraseFile(userFileAt: url.path) + } + static func openPhraseFile(userFileAt path: String) { func checkIfUserFilesExist() -> Bool { if !mgrLangModel.chkUserLMFilesExist(InputMode.imeModeCHS) @@ -373,7 +377,7 @@ public enum IME { // Extend the RangeReplaceableCollection to allow it clean duplicated characters. // Ref: https://stackoverflow.com/questions/25738817/ extension RangeReplaceableCollection where Element: Hashable { - var charDeDuplicate: Self { + var deduplicate: Self { var set = Set() return filter { set.insert($0).inserted } } @@ -417,3 +421,25 @@ extension UniChar { return true } } + +// MARK: - Stable Sort Extension + +// Ref: https://stackoverflow.com/a/50545761/4162914 +extension Sequence { + /// Return a stable-sorted collection. + /// + /// - Parameter areInIncreasingOrder: Return nil when two element are equal. + /// - Returns: The sorted collection. + public func stableSort( + by areInIncreasingOrder: (Element, Element) throws -> Bool + ) + rethrows -> [Element] + { + try enumerated() + .sorted { a, b -> Bool in + try areInIncreasingOrder(a.element, b.element) + || (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element)) + } + .map(\.element) + } +} diff --git a/Source/Modules/IMEModules/mgrPrefs.swift b/Source/Modules/IMEModules/mgrPrefs.swift index 28c7adc1..21ee8a22 100644 --- a/Source/Modules/IMEModules/mgrPrefs.swift +++ b/Source/Modules/IMEModules/mgrPrefs.swift @@ -56,6 +56,7 @@ struct UserDef { static let kShouldNotFartInLieuOfBeep = "ShouldNotFartInLieuOfBeep" static let kShowHanyuPinyinInCompositionBuffer = "ShowHanyuPinyinInCompositionBuffer" static let kInlineDumpPinyinInLieuOfZhuyin = "InlineDumpPinyinInLieuOfZhuyin" + static let kFetchSuggestionsFromUserOverrideModel = "FetchSuggestionsFromUserOverrideModel" static let kCandidateTextFontName = "CandidateTextFontName" static let kCandidateKeyLabelFontName = "CandidateKeyLabelFontName" @@ -270,6 +271,9 @@ public enum mgrPrefs { UserDefaults.standard.setDefault( mgrPrefs.allowBoostingSingleKanjiAsUserPhrase, forKey: UserDef.kAllowBoostingSingleKanjiAsUserPhrase ) + UserDefaults.standard.setDefault( + mgrPrefs.fetchSuggestionsFromUserOverrideModel, forKey: UserDef.kFetchSuggestionsFromUserOverrideModel + ) UserDefaults.standard.setDefault(mgrPrefs.usingHotKeySCPC, forKey: UserDef.kUsingHotKeySCPC) UserDefaults.standard.setDefault(mgrPrefs.usingHotKeyAssociates, forKey: UserDef.kUsingHotKeyAssociates) @@ -344,6 +348,9 @@ public enum mgrPrefs { @UserDefault(key: UserDef.kAllowBoostingSingleKanjiAsUserPhrase, defaultValue: false) static var allowBoostingSingleKanjiAsUserPhrase: Bool + @UserDefault(key: UserDef.kFetchSuggestionsFromUserOverrideModel, defaultValue: true) + static var fetchSuggestionsFromUserOverrideModel: Bool + static var minCandidateLength: Int { mgrPrefs.allowBoostingSingleKanjiAsUserPhrase ? 1 : 2 } diff --git a/Source/Modules/LangModelRelated/LMConsolidator.swift b/Source/Modules/LangModelRelated/LMConsolidator.swift index b9392b4f..3e16e6db 100644 --- a/Source/Modules/LangModelRelated/LMConsolidator.swift +++ b/Source/Modules/LangModelRelated/LMConsolidator.swift @@ -28,6 +28,9 @@ extension vChewing { public enum LMConsolidator { public static let kPragmaHeader = "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍" + /// 檢查給定檔案的標頭是否正常。 + /// - Parameter path: 給定檔案路徑。 + /// - Returns: 結果正常則為真,其餘為假。 public static func checkPragma(path: String) -> Bool { if FileManager.default.fileExists(atPath: path) { let fileHandle = FileHandle(forReadingAtPath: path)! @@ -51,12 +54,17 @@ extension vChewing { return false } + /// 檢查檔案是否以空行結尾,如果缺失則補充之。 + /// - Parameter path: 給定檔案路徑。 + /// - Returns: 結果正常或修復順利則為真,其餘為假。 @discardableResult public static func fixEOF(path: String) -> Bool { let urlPath = URL(fileURLWithPath: path) if FileManager.default.fileExists(atPath: path) { var strIncoming = "" do { strIncoming += try String(contentsOf: urlPath, encoding: .utf8) + /// 注意:Swift 版 LMConsolidator 並未在此安排對 EOF 的去重複工序。 + /// 但這個函式執行完之後往往就會 consolidate() 整理格式,所以不會有差。 if !strIncoming.hasSuffix("\n") { IME.prtDebugIntel("EOF Fix Necessity Confirmed, Start Fixing.") if let writeFile = FileHandle(forUpdatingAtPath: path), @@ -81,10 +89,14 @@ extension vChewing { return false } + /// 統整給定的檔案的格式。 + /// - Parameters: + /// - path: 給定檔案路徑。 + /// - shouldCheckPragma: 是否在檔案標頭完好無損的情況下略過對格式的整理。 + /// - Returns: 若整理順利或無須整理,則為真;反之為假。 @discardableResult public static func consolidate(path: String, pragma shouldCheckPragma: Bool) -> Bool { - var pragmaResult = false + let pragmaResult = checkPragma(path: path) if shouldCheckPragma { - pragmaResult = checkPragma(path: path) if pragmaResult { return true } @@ -105,6 +117,7 @@ extension vChewing { strProcessed.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") // 去除行尾行首空格 strProcessed.regReplace(pattern: #"(^ | $)"#, replaceWith: "") + strProcessed.regReplace(pattern: #"(\n | \n)"#, replaceWith: "\n") // CR & FF to LF, 且去除重複行 strProcessed.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") if strProcessed.prefix(1) == " " { // 去除檔案開頭空格 @@ -114,21 +127,21 @@ extension vChewing { strProcessed.removeLast() } - // Step 3: Add Formatted Pragma, the Sorted Header: + // Step 2: Add Formatted Pragma, the Sorted Header: if !pragmaResult { strProcessed = kPragmaHeader + "\n" + strProcessed // Add Sorted Header } - // Step 4: Deduplication. + // Step 3: Deduplication. let arrData = strProcessed.split(separator: "\n") // 下面兩行的 reversed 是首尾顛倒,免得破壞最新的 override 資訊。 let arrDataDeduplicated = Array(NSOrderedSet(array: arrData.reversed()).array as! [String]) strProcessed = arrDataDeduplicated.reversed().joined(separator: "\n") + "\n" - // Step 5: Remove duplicated newlines at the end of the file. - strProcessed.regReplace(pattern: "\\n+", replaceWith: "\n") + // Step 4: Remove duplicated newlines at the end of the file. + strProcessed.regReplace(pattern: #"\n+"#, replaceWith: "\n") - // Step 6: Write consolidated file contents. + // Step 5: Write consolidated file contents. try strProcessed.write(to: urlPath, atomically: false, encoding: .utf8) } catch { diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index 6ea47680..a8f81404 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -26,19 +26,10 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import Foundation -// 簡體中文模式與繁體中文模式共用全字庫擴展模組,故單獨處理。 -// 塞在 LMInstantiator 內的話,每個模式都會讀入一份全字庫,會多佔用 100MB 記憶體。 -private var lmCNS = vChewing.LMCoreNS( - reverse: true, consolidate: false, defaultScore: -11.0, forceDefaultScore: false -) -private var lmSymbols = vChewing.LMCoreNS( - reverse: true, consolidate: false, defaultScore: -13.0, forceDefaultScore: false -) - extension vChewing { /// 語言模組副本化模組(LMInstantiator,下稱「LMI」)自身為符合天權星組字引擎內 - /// 的 LanguageModel 協定的模組、統籌且整理來自其它子模組的資料(包括使用者語彙、 - /// 繪文字模組、語彙濾除表、原廠語言模組等)。 + /// 的 LanguageModelProtocol 協定的模組、統籌且整理來自其它子模組的資料(包括使 + /// 用者語彙、繪文字模組、語彙濾除表、原廠語言模組等)。 /// /// LMI 型別為與輸入法按鍵調度模組直接溝通之唯一語言模組。當組字器開始根據給定的 /// 讀音鏈構築語句時,LMI 會接收來自組字器的讀音、輪流檢查自身是否有可以匹配到的 @@ -53,7 +44,7 @@ extension vChewing { /// /// LMI 會根據需要分別載入原廠語言模組和其他個別的子語言模組。LMI 本身不會記錄這些 /// 語言模組的相關資料的存放位置,僅藉由參數來讀取相關訊息。 - public class LMInstantiator: Megrez.LanguageModel { + public class LMInstantiator: LanguageModelProtocol { // 在函式內部用以記錄狀態的開關。 public var isPhraseReplacementEnabled = false public var isCNSEnabled = false @@ -79,6 +70,15 @@ extension vChewing { reverse: true, consolidate: false, defaultScore: -1.0, forceDefaultScore: false ) + // 簡體中文模式與繁體中文模式共用全字庫擴展模組,故靜態處理。 + // 不然,每個模式都會讀入一份全字庫,會多佔用 100MB 記憶體。 + static var lmCNS = vChewing.LMCoreNS( + reverse: true, consolidate: false, defaultScore: -11.0, forceDefaultScore: false + ) + static var lmSymbols = vChewing.LMCoreNS( + reverse: true, consolidate: false, defaultScore: -13.0, forceDefaultScore: false + ) + // 聲明使用者語言模組。 // 使用者語言模組使用多執行緒的話,可能會導致一些問題。有時間再仔細排查看看。 var lmUserPhrases = LMCoreEX( @@ -93,10 +93,7 @@ extension vChewing { var lmReplacements = LMReplacments() var lmAssociates = LMAssociates() - // 初期化的函式先保留 - override init() {} - - // 以下這些函式命名暫時保持原樣,等弒神行動徹底結束了再調整。 + // MARK: - 工具函式 public var isLanguageModelLoaded: Bool { lmCore.isLoaded() } public func loadLanguageModel(path: String) { @@ -108,11 +105,11 @@ extension vChewing { } } - public var isCNSDataLoaded: Bool { lmCNS.isLoaded() } + public var isCNSDataLoaded: Bool { vChewing.LMInstantiator.lmCNS.isLoaded() } public func loadCNSData(path: String) { if FileManager.default.isReadableFile(atPath: path) { - lmCNS.open(path) - IME.prtDebugIntel("lmCNS: \(lmCNS.count) entries of data loaded from: \(path)") + vChewing.LMInstantiator.lmCNS.open(path) + IME.prtDebugIntel("lmCNS: \(vChewing.LMInstantiator.lmCNS.count) entries of data loaded from: \(path)") } else { IME.prtDebugIntel("lmCNS: File access failure: \(path)") } @@ -128,11 +125,11 @@ extension vChewing { } } - public var isSymbolDataLoaded: Bool { lmSymbols.isLoaded() } + public var isSymbolDataLoaded: Bool { vChewing.LMInstantiator.lmSymbols.isLoaded() } public func loadSymbolData(path: String) { if FileManager.default.isReadableFile(atPath: path) { - lmSymbols.open(path) - IME.prtDebugIntel("lmSymbol: \(lmSymbols.count) entries of data loaded from: \(path)") + vChewing.LMInstantiator.lmSymbols.open(path) + IME.prtDebugIntel("lmSymbol: \(vChewing.LMInstantiator.lmSymbols.count) entries of data loaded from: \(path)") } else { IME.prtDebugIntel("lmSymbols: File access failure: \(path)") } @@ -185,7 +182,7 @@ extension vChewing { } } - // MARK: - Core Functions (Public) + // MARK: - 核心函式(對外) /// 威注音輸入法目前尚未具備對雙元圖的處理能力,故停用該函式。 // public func bigramsForKeys(preceedingKey: String, key: String) -> [Megrez.Bigram] { } @@ -193,11 +190,11 @@ extension vChewing { /// 給定讀音字串,讓 LMI 給出對應的經過處理的單元圖陣列。 /// - Parameter key: 給定的讀音字串。 /// - Returns: 對應的經過處理的單元圖陣列。 - override open func unigramsFor(key: String) -> [Megrez.Unigram] { + public func unigramsFor(key: String) -> [Megrez.Unigram] { if key == " " { /// 給空格鍵指定輸出值。 let spaceUnigram = Megrez.Unigram( - keyValue: Megrez.KeyValuePair(key: " ", value: " "), + keyValue: Megrez.KeyValuePaired(key: " ", value: " "), score: 0 ) return [spaceUnigram] @@ -216,16 +213,16 @@ extension vChewing { rawAllUnigrams += lmCore.unigramsFor(key: key) if isCNSEnabled { - rawAllUnigrams += lmCNS.unigramsFor(key: key) + rawAllUnigrams += vChewing.LMInstantiator.lmCNS.unigramsFor(key: key) } if isSymbolEnabled { rawAllUnigrams += lmUserSymbols.unigramsFor(key: key) - rawAllUnigrams += lmSymbols.unigramsFor(key: key) + rawAllUnigrams += vChewing.LMInstantiator.lmSymbols.unigramsFor(key: key) } // 準備過濾清單。因為我們在 Swift 使用 NSOrderedSet,所以就不需要統計清單了。 - var filteredPairs: Set = [] + var filteredPairs: Set = [] // 載入要過濾的 KeyValuePair 清單。 for unigram in lmFiltered.unigramsFor(key: key) { @@ -238,9 +235,10 @@ extension vChewing { ) } - /// If the model has unigrams for the given key. - /// @param key The key. - override open func hasUnigramsFor(key: String) -> Bool { + /// 根據給定的索引鍵來確認各個資料庫陣列內是否存在對應的資料。 + /// - Parameter key: 索引鍵。 + /// - Returns: 是否在庫。 + public func hasUnigramsFor(key: String) -> Bool { if key == " " { return true } if !lmFiltered.hasUnigramsFor(key: key) { @@ -250,46 +248,40 @@ extension vChewing { return !unigramsFor(key: key).isEmpty } - public func associatedPhrasesForKey(_ key: String) -> [String] { + public func associatedPhrasesFor(key: String) -> [String] { lmAssociates.valuesFor(key: key) ?? [] } - public func hasAssociatedPhrasesForKey(_ key: String) -> Bool { + public func hasAssociatedPhrasesFor(key: String) -> Bool { lmAssociates.hasValuesFor(key: key) } - // MARK: - Core Functions (Private) + /// 該函式不起作用,僅用來滿足 LanguageModelProtocol 協定的要求。 + public func bigramsForKeys(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() } + + // MARK: - 核心函式(對內) /// 給定單元圖原始結果陣列,經過語彙過濾處理+置換處理+去重複處理之後,給出單元圖結果陣列。 /// - Parameters: - /// - unigrams: 傳入的單元圖原始結果陣列 - /// - filteredPairs: 傳入的要過濾掉的鍵值配對陣列 - /// - Returns: 經過語彙過濾處理+置換處理+去重複處理的單元圖結果陣列 + /// - unigrams: 傳入的單元圖原始結果陣列。 + /// - filteredPairs: 傳入的要過濾掉的鍵值配對陣列。 + /// - Returns: 經過語彙過濾處理+置換處理+去重複處理的單元圖結果陣列。 func filterAndTransform( unigrams: [Megrez.Unigram], - filter filteredPairs: Set + filter filteredPairs: Set ) -> [Megrez.Unigram] { var results: [Megrez.Unigram] = [] - var insertedPairs: Set = [] - + var insertedPairs: Set = [] for unigram in unigrams { - var pair: Megrez.KeyValuePair = unigram.keyValue - if filteredPairs.contains(pair) { - continue - } - + var pair: Megrez.KeyValuePaired = unigram.keyValue + if filteredPairs.contains(pair) { continue } if isPhraseReplacementEnabled { let replacement = lmReplacements.valuesFor(key: pair.value) - if !replacement.isEmpty { - IME.prtDebugIntel("\(pair.value) -> \(replacement)") - pair.value = replacement - } - } - - if !insertedPairs.contains(pair) { - results.append(Megrez.Unigram(keyValue: pair, score: unigram.score)) - insertedPairs.insert(pair) + if !replacement.isEmpty { pair.value = replacement } } + if insertedPairs.contains(pair) { continue } + results.append(Megrez.Unigram(keyValue: pair, score: unigram.score)) + insertedPairs.insert(pair) } return results } diff --git a/Source/Modules/LangModelRelated/SymbolNode.swift b/Source/Modules/LangModelRelated/LMSymbolNode.swift similarity index 83% rename from Source/Modules/LangModelRelated/SymbolNode.swift rename to Source/Modules/LangModelRelated/LMSymbolNode.swift index 7226b28e..7091a198 100644 --- a/Source/Modules/LangModelRelated/SymbolNode.swift +++ b/Source/Modules/LangModelRelated/LMSymbolNode.swift @@ -40,6 +40,34 @@ class SymbolNode { children = Array(symbols).map { SymbolNode(String($0), nil) } } + static func parseUserSymbolNodeData() { + let url = mgrLangModel.userSymbolNodeDataURL() + // 這兩個變數單獨拿出來,省得每次都重建還要浪費算力。 + var arrLines = [String.SubSequence]() + var fieldSlice = [Substring.SubSequence]() + var arrChildren = [SymbolNode]() + do { + arrLines = try String(contentsOfFile: url.path, encoding: .utf8).split(separator: "\n") + for strLine in arrLines.lazy.filter({ !$0.isEmpty }) { + fieldSlice = strLine.split(separator: "=") + switch fieldSlice.count { + case 1: arrChildren.append(.init(String(fieldSlice[0]))) + case 2: arrChildren.append(.init(String(fieldSlice[0]), symbols: .init(fieldSlice[1]))) + default: break + } + } + if arrChildren.isEmpty { + root = defaultSymbolRoot + } else { + root = .init("/", arrChildren) + } + } catch { + root = defaultSymbolRoot + } + } + + // MARK: - Static data. + static let catCommonSymbols = String( format: NSLocalizedString("catCommonSymbols", comment: "")) static let catHoriBrackets = String( @@ -71,7 +99,9 @@ class SymbolNode { static let catLineSegments = String( format: NSLocalizedString("catLineSegments", comment: "")) - static let root: SymbolNode = .init( + private(set) static var root: SymbolNode = .init("/") + + private static let defaultSymbolRoot: SymbolNode = .init( "/", [ SymbolNode("`"), diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift index 534d83a2..07170818 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift @@ -31,7 +31,7 @@ extension vChewing { /// 資料記錄原理與上游 C++ 的 ParselessLM 差不多,但用的是 Swift 原生手段。 /// 主要時間消耗仍在 For 迴圈,但這個算法可以顯著減少記憶體佔用。 @frozen public struct LMCoreEX { - /// 資料庫陣列。索引內容為注音字串,資料內容則為字串首尾範圍、方便自 strData 取資料。 + /// 資料庫辭典。索引內容為注音字串,資料內容則為字串首尾範圍、方便自 strData 取資料。 var rangeMap: [String: [Range]] = [:] /// 資料庫字串陣列。 var strData: String = "" @@ -66,12 +66,12 @@ extension vChewing { shouldForceDefaultScore = forceDefaultScore } - /// 檢測資料庫陣列內是否已經有載入的資料。 + /// 檢測資料庫辭典內是否已經有載入的資料。 public func isLoaded() -> Bool { !rangeMap.isEmpty } - /// 將資料從檔案讀入至資料庫陣列內。 + /// 將資料從檔案讀入至資料庫辭典內。 /// - parameters: /// - path: 給定路徑 @discardableResult public mutating func open(_ path: String) -> Bool { @@ -88,9 +88,9 @@ extension vChewing { strData = try String(contentsOfFile: path, encoding: .utf8).replacingOccurrences(of: "\t", with: " ") strData.ranges(splitBy: "\n").forEach { let neta = strData[$0].split(separator: " ") - if neta.count >= 2 { - let theKey = shouldReverse ? String(neta[1]) : String(neta[0]) - if !neta[0].isEmpty, !neta[1].isEmpty, theKey.first != "#" { + if neta.count >= 2, String(neta[0]).first != "#" { + if !neta[0].isEmpty, !neta[1].isEmpty { + let theKey = shouldReverse ? String(neta[1]) : String(neta[0]) let theValue = $0 rangeMap[theKey, default: []].append(theValue) } @@ -105,7 +105,7 @@ extension vChewing { return true } - /// 將當前語言模組的資料庫陣列自記憶體內卸除。 + /// 將當前語言模組的資料庫辭典自記憶體內卸除。 public mutating func close() { if isLoaded() { rangeMap.removeAll() @@ -114,7 +114,7 @@ extension vChewing { // MARK: - Advanced features - /// 將當前資料庫陣列的內容以文本的形式輸出至 macOS 內建的 Console.app。 + /// 將當前資料庫辭典的內容以文本的形式輸出至 macOS 內建的 Console.app。 /// /// 該功能僅作偵錯之用途。 public func dump() { @@ -130,7 +130,7 @@ extension vChewing { IME.prtDebugIntel(strDump) } - /// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫陣列內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成雙元圖陣列。 + /// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成雙元圖陣列。 /// /// 威注音輸入法尚未引入雙元圖支援,所以該函式並未擴充相關功能,自然不會起作用。 /// - parameters: @@ -142,7 +142,7 @@ extension vChewing { precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() } - /// 根據給定的讀音索引鍵,來獲取資料庫陣列內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成單元圖陣列。 + /// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成單元圖陣列。 /// - parameters: /// - key: 讀音索引鍵 public func unigramsFor(key: String) -> [Megrez.Unigram] { @@ -151,7 +151,7 @@ extension vChewing { for netaRange in arrRangeRecords { let neta = strData[netaRange].split(separator: " ") let theValue: String = shouldReverse ? String(neta[0]) : String(neta[1]) - let kvPair = Megrez.KeyValuePair(key: key, value: theValue) + let kvPair = Megrez.KeyValuePaired(key: key, value: theValue) var theScore = defaultScore if neta.count >= 3, !shouldForceDefaultScore { theScore = .init(String(neta[2])) ?? defaultScore @@ -165,7 +165,7 @@ extension vChewing { return grams } - /// 根據給定的讀音索引鍵來確認資料庫陣列內是否存在對應的資料。 + /// 根據給定的讀音索引鍵來確認資料庫辭典內是否存在對應的資料。 /// - parameters: /// - key: 讀音索引鍵 public func hasUnigramsFor(key: String) -> Bool { diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift index 926bf7fc..c1f93e7f 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift @@ -29,7 +29,7 @@ extension vChewing { /// 這樣一來可以節省在舊 mac 機種內的資料讀入速度。 /// 目前僅針對輸入法原廠語彙資料檔案使用 plist 格式。 @frozen public struct LMCoreNS { - /// 資料庫陣列。索引內容為經過加密的注音字串,資料內容則為 UTF8 資料陣列。 + /// 資料庫辭典。索引內容為經過加密的注音字串,資料內容則為 UTF8 資料陣列。 var rangeMap: [String: [Data]] = [:] /// 【已作廢】資料庫字串陣列。在 LMCoreNS 內沒有作用。 var strData: String = "" @@ -67,12 +67,12 @@ extension vChewing { shouldForceDefaultScore = forceDefaultScore } - /// 檢測資料庫陣列內是否已經有載入的資料。 + /// 檢測資料庫辭典內是否已經有載入的資料。 public func isLoaded() -> Bool { !rangeMap.isEmpty } - /// 將資料從檔案讀入至資料庫陣列內。 + /// 將資料從檔案讀入至資料庫辭典內。 /// - parameters: /// - path: 給定路徑 @discardableResult public mutating func open(_ path: String) -> Bool { @@ -93,7 +93,7 @@ extension vChewing { return true } - /// 將當前語言模組的資料庫陣列自記憶體內卸除。 + /// 將當前語言模組的資料庫辭典自記憶體內卸除。 public mutating func close() { if isLoaded() { rangeMap.removeAll() @@ -102,7 +102,7 @@ extension vChewing { // MARK: - Advanced features - /// 將當前資料庫陣列的內容以文本的形式輸出至 macOS 內建的 Console.app。 + /// 將當前資料庫辭典的內容以文本的形式輸出至 macOS 內建的 Console.app。 /// /// 該功能僅作偵錯之用途。 public func dump() { @@ -124,7 +124,7 @@ extension vChewing { IME.prtDebugIntel(strDump) } - /// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫陣列內的對應資料陣列的 UTF8 資料、就地分析、生成雙元圖陣列。 + /// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成雙元圖陣列。 /// /// 威注音輸入法尚未引入雙元圖支援,所以該函式並未擴充相關功能,自然不會起作用。 /// - parameters: @@ -136,7 +136,7 @@ extension vChewing { precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() } - /// 根據給定的讀音索引鍵,來獲取資料庫陣列內的對應資料陣列的 UTF8 資料、就地分析、生成單元圖陣列。 + /// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成單元圖陣列。 /// - parameters: /// - key: 讀音索引鍵 public func unigramsFor(key: String) -> [Megrez.Unigram] { @@ -146,7 +146,7 @@ extension vChewing { let strNetaSet = String(decoding: netaSet, as: UTF8.self) let neta = Array(strNetaSet.split(separator: " ").reversed()) let theValue: String = .init(neta[0]) - let kvPair = Megrez.KeyValuePair(key: key, value: theValue) + let kvPair = Megrez.KeyValuePaired(key: key, value: theValue) var theScore = defaultScore if neta.count >= 2, !shouldForceDefaultScore { theScore = .init(String(neta[1])) ?? defaultScore @@ -160,7 +160,7 @@ extension vChewing { return grams } - /// 根據給定的讀音索引鍵來確認資料庫陣列內是否存在對應的資料。 + /// 根據給定的讀音索引鍵來確認資料庫辭典內是否存在對應的資料。 /// - parameters: /// - key: 讀音索引鍵 public func hasUnigramsFor(key: String) -> Bool { diff --git a/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift index 5a3a2da3..89adccea 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift @@ -1,5 +1,5 @@ // Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). -// Refactored from the ObjCpp-version of this class by Mengjuei Hsieh (MIT License). +// Refactored from the Cpp version of this class by Mengjuei Hsieh (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -27,38 +27,6 @@ import Foundation extension vChewing { public class LMUserOverride { - // MARK: - Private Structures - - // 這些型別必須得用 class,不然會導致拿不到有效建議。 - - class Override { - var count: Int = 0 - var timestamp: Double = 0.0 - } - - class Observation { - var count: Int = 0 - var overrides: [String: Override] = [:] - - func update(candidate: String, timestamp: Double) { - count += 1 - if let neta = overrides[candidate] { - neta.timestamp = timestamp - neta.count += 1 - overrides[candidate] = neta - } - } - } - - class KeyObservationPair { - var key: String - var observation: Observation - init(key: String, observation: Observation) { - self.key = key - self.observation = observation - } - } - // MARK: - Main var mutCapacity: Int @@ -73,50 +41,57 @@ extension vChewing { } public func observe( - walkedNodes: [Megrez.NodeAnchor], + walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, candidate: String, timestamp: Double ) { - let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex) + let key = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex) + guard !key.isEmpty else { return } guard mutLRUMap[key] != nil else { - let observation: Observation = .init() + var observation: Observation = .init() observation.update(candidate: candidate, timestamp: timestamp) let koPair = KeyObservationPair(key: key, observation: observation) + // 先移除 key 再設定 key 的話,就可以影響這個 key 在辭典內的順位。 + // Swift 原生的辭典是沒有數字索引排序的,但資料的插入順序卻有保存著。 + mutLRUMap.removeValue(forKey: key) mutLRUMap[key] = koPair mutLRUList.insert(koPair, at: 0) if mutLRUList.count > mutCapacity { - mutLRUMap[mutLRUList[mutLRUList.endIndex].key] = nil + mutLRUMap.removeValue(forKey: mutLRUList[mutLRUList.endIndex].key) mutLRUList.removeLast() } IME.prtDebugIntel("UOM: Observation finished with new observation: \(key)") + mgrLangModel.saveUserOverrideModelData() return } - if let theNeta = mutLRUMap[key] { + if var theNeta = mutLRUMap[key] { theNeta.observation.update(candidate: candidate, timestamp: timestamp) mutLRUList.insert(theNeta, at: 0) mutLRUMap[key] = theNeta IME.prtDebugIntel("UOM: Observation finished with existing observation: \(key)") + mgrLangModel.saveUserOverrideModelData() } } public func suggest( - walkedNodes: [Megrez.NodeAnchor], + walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, timestamp: Double - ) -> String { - let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex) + ) -> [Megrez.Unigram] { + let key = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex) + let currentReadingKey = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex, readingOnly: true) guard let koPair = mutLRUMap[key] else { IME.prtDebugIntel("UOM: mutLRUMap[key] is nil, throwing blank suggestion for key: \(key).") - return "" + return .init() } let observation = koPair.observation - var candidate = "" - var score = 0.0 + var arrResults = [Megrez.Unigram]() + var currentHighScore = 0.0 for overrideNeta in Array(observation.overrides) { let override: Override = overrideNeta.value let overrideScore: Double = getScore( @@ -126,23 +101,20 @@ extension vChewing { timestamp: timestamp, lambda: mutDecayExponent ) - - if overrideScore == 0.0 { - continue - } - - if overrideScore > score { - candidate = overrideNeta.key - score = overrideScore - } + if (0...currentHighScore).contains(overrideScore) { continue } + let newUnigram = Megrez.Unigram( + keyValue: .init(key: currentReadingKey, value: overrideNeta.key), score: overrideScore + ) + arrResults.insert(newUnigram, at: 0) + currentHighScore = overrideScore } - if candidate.isEmpty { + if arrResults.isEmpty { IME.prtDebugIntel("UOM: No usable suggestions in the result for key: \(key).") } - return candidate + return arrResults } - public func getScore( + private func getScore( eventCount: Int, totalCount: Int, eventTimestamp: Double, @@ -156,50 +128,193 @@ extension vChewing { } func convertKeyFrom( - walkedNodes: [Megrez.NodeAnchor], cursorIndex: Int + walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false ) -> String { let arrEndingPunctuation = [",", "。", "!", "?", "」", "』", "”", "’"] - var arrNodesReversed: [Megrez.NodeAnchor] = [] + let whiteList = "你他妳她祢她它牠再在" + var arrNodes: [Megrez.NodeAnchor] = [] var intLength = 0 - for theNodeAnchor in walkedNodes { - // 這裡直接生成一個反向排序的陣列,之後就不用再「.reverse()」了。 - arrNodesReversed = [theNodeAnchor] + arrNodesReversed + for theNodeAnchor in walkedAnchors { + arrNodes.append(theNodeAnchor) intLength += theNodeAnchor.spanningLength if intLength >= cursorIndex { break } } - if arrNodesReversed.isEmpty { return "" } + if arrNodes.isEmpty { return "" } - var strCurrent = "()" - var strPrevious = "()" - var strAnterior = "()" + arrNodes = Array(arrNodes.reversed()) - guard let kvCurrent = arrNodesReversed[0].node?.currentKeyValue, + guard let kvCurrent = arrNodes[0].node?.currentKeyValue, !arrEndingPunctuation.contains(kvCurrent.value) else { return "" } + // 字音數與字數不一致的內容會被拋棄。 + if kvCurrent.key.split(separator: "-").count != kvCurrent.value.count { return "" } + // 前置單元只記錄讀音,在其後的單元則同時記錄讀音與字詞 - strCurrent = kvCurrent.key - - if arrNodesReversed.count >= 2, - let kvPrevious = arrNodesReversed[1].node?.currentKeyValue, - !arrEndingPunctuation.contains(kvPrevious.value) - { - strPrevious = "(\(kvPrevious.key),\(kvPrevious.value))" + let strCurrent = kvCurrent.key + var kvPrevious = Megrez.KeyValuePaired() + var kvAnterior = Megrez.KeyValuePaired() + var readingStack = "" + var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" } + var result: String { + // 不要把單個漢字的 kvCurrent 當前鍵值領頭的單元圖記入資料庫,不然對敲字體驗破壞太大。 + if readingStack.contains("_") + || (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value)) + { + return "" + } else { + return (readingOnly ? strCurrent : trigramKey) + } } - if arrNodesReversed.count >= 3, - let kvAnterior = arrNodesReversed[2].node?.currentKeyValue, - !arrEndingPunctuation.contains(kvAnterior.value) + if arrNodes.count >= 2, + let kvPreviousThisOne = arrNodes[1].node?.currentKeyValue, + !arrEndingPunctuation.contains(kvPrevious.value), + kvPrevious.key.split(separator: "-").count == kvPrevious.value.count { - strAnterior = "(\(kvAnterior.key),\(kvAnterior.value))" + kvPrevious = kvPreviousThisOne + readingStack = kvPrevious.key + readingStack } - return "(\(strAnterior),\(strPrevious),\(strCurrent))" + if arrNodes.count >= 3, + let kvAnteriorThisOne = arrNodes[2].node?.currentKeyValue, + !arrEndingPunctuation.contains(kvAnterior.value), + kvAnterior.key.split(separator: "-").count == kvAnterior.value.count + { + kvAnterior = kvAnteriorThisOne + readingStack = kvAnterior.key + readingStack + } + + return result + } + } +} + +// MARK: - Private Structures + +extension vChewing.LMUserOverride { + enum OverrideUnit: CodingKey { case count, timestamp } + enum ObservationUnit: CodingKey { case count, overrides } + enum KeyObservationPairUnit: CodingKey { case key, observation } + + struct Override: Hashable, Encodable, Decodable { + var count: Int = 0 + var timestamp: Double = 0.0 + static func == (lhs: Override, rhs: Override) -> Bool { + lhs.count == rhs.count && lhs.timestamp == rhs.timestamp + } + + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: OverrideUnit.self) + try container.encode(timestamp, forKey: .timestamp) + try container.encode(count, forKey: .count) + } + + func hash(into hasher: inout Hasher) { + hasher.combine(count) + hasher.combine(timestamp) + } + } + + struct Observation: Hashable, Encodable, Decodable { + var count: Int = 0 + var overrides: [String: Override] = [:] + static func == (lhs: Observation, rhs: Observation) -> Bool { + lhs.count == rhs.count && lhs.overrides == rhs.overrides + } + + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: ObservationUnit.self) + try container.encode(count, forKey: .count) + try container.encode(overrides, forKey: .overrides) + } + + func hash(into hasher: inout Hasher) { + hasher.combine(count) + hasher.combine(overrides) + } + + mutating func update(candidate: String, timestamp: Double) { + count += 1 + if overrides.keys.contains(candidate) { + overrides[candidate]?.timestamp = timestamp + overrides[candidate]?.count += 1 + } else { + overrides[candidate] = .init(count: 1, timestamp: timestamp) + } + } + } + + struct KeyObservationPair: Hashable, Encodable, Decodable { + var key: String + var observation: Observation + static func == (lhs: KeyObservationPair, rhs: KeyObservationPair) -> Bool { + lhs.key == rhs.key && lhs.observation == rhs.observation + } + + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: KeyObservationPairUnit.self) + try container.encode(key, forKey: .key) + try container.encode(observation, forKey: .observation) + } + + func hash(into hasher: inout Hasher) { + hasher.combine(key) + hasher.combine(observation) + } + } +} + +// MARK: - Hash and Dehash the entire UOM data + +extension vChewing.LMUserOverride { + /// 自 LRU 辭典內移除所有的單元圖。 + public func bleachUnigrams() { + for key in mutLRUMap.keys { + if !key.contains("(),()") { continue } + mutLRUMap.removeValue(forKey: key) + } + resetMRUList() + mgrLangModel.saveUserOverrideModelData() + } + + internal func resetMRUList() { + mutLRUList.removeAll() + for neta in mutLRUMap.reversed() { + mutLRUList.append(neta.value) + } + } + + public func saveData(toURL fileURL: URL) { + let encoder = JSONEncoder() + do { + if let jsonData = try? encoder.encode(mutLRUMap) { + try jsonData.write(to: fileURL, options: .atomic) + } + } catch { + IME.prtDebugIntel("UOM Error: Unable to save data, abort saving. Details: \(error)") + return + } + } + + public func loadData(fromURL fileURL: URL) { + let decoder = JSONDecoder() + do { + let data = try Data(contentsOf: fileURL, options: .mappedIfSafe) + guard let jsonResult = try? decoder.decode([String: KeyObservationPair].self, from: data) else { + IME.prtDebugIntel("UOM Error: Read file content type invalid, abort loading.") + return + } + mutLRUMap = jsonResult + resetMRUList() + } catch { + IME.prtDebugIntel("UOM Error: Unable to read file or parse the data, abort loading. Details: \(error)") + return } } } diff --git a/Source/Modules/LangModelRelated/mgrLangModel.swift b/Source/Modules/LangModelRelated/mgrLangModel.swift index 08f74d5d..7afa7aa8 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.swift +++ b/Source/Modules/LangModelRelated/mgrLangModel.swift @@ -149,32 +149,37 @@ enum mgrLangModel { public static func loadUserPhrasesData() { gLangModelCHT.loadUserPhrasesData( - path: userPhrasesDataPath(InputMode.imeModeCHT), - filterPath: excludedPhrasesDataPath(InputMode.imeModeCHT) + path: userPhrasesDataURL(InputMode.imeModeCHT).path, + filterPath: userFilteredDataURL(InputMode.imeModeCHT).path ) gLangModelCHS.loadUserPhrasesData( - path: userPhrasesDataPath(InputMode.imeModeCHS), - filterPath: excludedPhrasesDataPath(InputMode.imeModeCHS) + path: userPhrasesDataURL(InputMode.imeModeCHS).path, + filterPath: userFilteredDataURL(InputMode.imeModeCHS).path ) - gLangModelCHT.loadUserSymbolData(path: userSymbolDataPath(InputMode.imeModeCHT)) - gLangModelCHS.loadUserSymbolData(path: userSymbolDataPath(InputMode.imeModeCHS)) + gLangModelCHT.loadUserSymbolData(path: userSymbolDataURL(InputMode.imeModeCHT).path) + gLangModelCHS.loadUserSymbolData(path: userSymbolDataURL(InputMode.imeModeCHS).path) + + gUserOverrideModelCHT.loadData(fromURL: userOverrideModelDataURL(InputMode.imeModeCHT)) + gUserOverrideModelCHS.loadData(fromURL: userOverrideModelDataURL(InputMode.imeModeCHS)) + + SymbolNode.parseUserSymbolNodeData() } public static func loadUserAssociatesData() { gLangModelCHT.loadUserAssociatesData( - path: mgrLangModel.userAssociatedPhrasesDataPath(InputMode.imeModeCHT) + path: mgrLangModel.userAssociatesDataURL(InputMode.imeModeCHT).path ) gLangModelCHS.loadUserAssociatesData( - path: mgrLangModel.userAssociatedPhrasesDataPath(InputMode.imeModeCHS) + path: mgrLangModel.userAssociatesDataURL(InputMode.imeModeCHS).path ) } public static func loadUserPhraseReplacement() { gLangModelCHT.loadReplacementsData( - path: mgrLangModel.phraseReplacementDataPath(InputMode.imeModeCHT) + path: mgrLangModel.userReplacementsDataURL(InputMode.imeModeCHT).path ) gLangModelCHS.loadReplacementsData( - path: mgrLangModel.phraseReplacementDataPath(InputMode.imeModeCHS) + path: mgrLangModel.userReplacementsDataURL(InputMode.imeModeCHS).path ) } @@ -219,37 +224,48 @@ enum mgrLangModel { // Swift 的 appendingPathComponent 需要藉由 URL 完成,最後再用 .path 轉為路徑。 - static func userPhrasesDataPath(_ mode: InputMode) -> String { + static func userPhrasesDataURL(_ mode: InputMode) -> URL { let fileName = (mode == InputMode.imeModeCHT) ? "userdata-cht.txt" : "userdata-chs.txt" - return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName).path + return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) } - static func userSymbolDataPath(_ mode: InputMode) -> String { + static func userSymbolDataURL(_ mode: InputMode) -> URL { let fileName = (mode == InputMode.imeModeCHT) ? "usersymbolphrases-cht.txt" : "usersymbolphrases-chs.txt" - return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName).path + return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) } - static func userAssociatedPhrasesDataPath(_ mode: InputMode) -> String { + static func userAssociatesDataURL(_ mode: InputMode) -> URL { let fileName = (mode == InputMode.imeModeCHT) ? "associatedPhrases-cht.txt" : "associatedPhrases-chs.txt" - return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName).path + return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) } - static func excludedPhrasesDataPath(_ mode: InputMode) -> String { + static func userFilteredDataURL(_ mode: InputMode) -> URL { let fileName = (mode == InputMode.imeModeCHT) ? "exclude-phrases-cht.txt" : "exclude-phrases-chs.txt" - return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName).path + return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) } - static func phraseReplacementDataPath(_ mode: InputMode) -> String { + static func userReplacementsDataURL(_ mode: InputMode) -> URL { let fileName = (mode == InputMode.imeModeCHT) ? "phrases-replacement-cht.txt" : "phrases-replacement-chs.txt" - return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName).path + return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) + } + + static func userSymbolNodeDataURL() -> URL { + let fileName = "symbols.dat" + return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) + } + + static func userOverrideModelDataURL(_ mode: InputMode) -> URL { + let fileName = (mode == InputMode.imeModeCHT) ? "override-model-data-cht.dat" : "override-model-data-chs.dat" + return URL(fileURLWithPath: dataFolderPath(isDefaultFolder: false)).appendingPathComponent(fileName) } // MARK: - 檢查具體的使用者語彙檔案是否存在 static func ensureFileExists( - _ filePath: String, populateWithTemplate templateBasename: String = "1145141919810", + _ fileURL: URL, populateWithTemplate templateBasename: String = "1145141919810", extension ext: String = "txt" ) -> Bool { + let filePath = fileURL.path if !FileManager.default.fileExists(atPath: filePath) { let templateURL = Bundle.main.url(forResource: templateBasename, withExtension: ext) var templateData = Data("".utf8) @@ -274,11 +290,14 @@ enum mgrLangModel { if !userDataFolderExists { return false } - if !ensureFileExists(userPhrasesDataPath(mode)) - || !ensureFileExists(userAssociatedPhrasesDataPath(mode)) - || !ensureFileExists(excludedPhrasesDataPath(mode)) - || !ensureFileExists(phraseReplacementDataPath(mode)) - || !ensureFileExists(userSymbolDataPath(mode)) + /// SymbolNode 資料與 UserOverrideModel 半衰模組資料檔案不需要強行確保存在。 + /// 前者的話,需要該檔案存在的人自己會建立。 + /// 後者的話,你在敲字時自己就會建立。 + if !ensureFileExists(userPhrasesDataURL(mode)) + || !ensureFileExists(userAssociatesDataURL(mode)) + || !ensureFileExists(userFilteredDataURL(mode)) + || !ensureFileExists(userReplacementsDataURL(mode)) + || !ensureFileExists(userSymbolDataURL(mode)) { return false } @@ -391,7 +410,7 @@ enum mgrLangModel { return false } - let path = areWeDeleting ? excludedPhrasesDataPath(mode) : userPhrasesDataPath(mode) + let theURL = areWeDeleting ? userFilteredDataURL(mode) : userPhrasesDataURL(mode) if areWeDuplicating, !areWeDeleting { // Do not use ASCII characters to comment here. @@ -400,7 +419,7 @@ enum mgrLangModel { currentMarkedPhrase += "\t#𝙾𝚟𝚎𝚛𝚛𝚒𝚍𝚎" } - if let writeFile = FileHandle(forUpdatingAtPath: path), + if let writeFile = FileHandle(forUpdatingAtPath: theURL.path), let data = currentMarkedPhrase.data(using: .utf8), let endl = "\n".data(using: .utf8) { @@ -415,7 +434,7 @@ enum mgrLangModel { // We enforce the format consolidation here, since the pragma header // will let the UserPhraseLM bypasses the consolidating process on load. - if !vChewing.LMConsolidator.consolidate(path: path, pragma: false) { + if !vChewing.LMConsolidator.consolidate(path: theURL.path, pragma: false) { return false } @@ -428,4 +447,20 @@ enum mgrLangModel { } return false } + + static func saveUserOverrideModelData() { + gUserOverrideModelCHT.saveData(toURL: userOverrideModelDataURL(InputMode.imeModeCHT)) + gUserOverrideModelCHS.saveData(toURL: userOverrideModelDataURL(InputMode.imeModeCHS)) + } + + static func removeUnigramsFromUserOverrideModel(_ mode: InputMode) { + switch mode { + case .imeModeCHS: + gUserOverrideModelCHT.bleachUnigrams() + case .imeModeCHT: + gUserOverrideModelCHS.bleachUnigrams() + case .imeModeNULL: + break + } + } } diff --git a/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift b/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift index 3ba9a524..72a3284b 100644 --- a/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift +++ b/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift @@ -35,9 +35,9 @@ extension Megrez { /// 該組字器的軌格。 private var mutGrid: Grid = .init() /// 該組字器所使用的語言模型。 - private var mutLM: LanguageModel + private var mutLM: LanguageModelProtocol - /// 公開該組字器內可以允許的最大詞長。 + /// 公開:該組字器內可以允許的最大詞長。 public var maxBuildSpanLength: Int { mutGrid.maxBuildSpanLength } /// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為空。 public var joinSeparator: String = "" @@ -48,7 +48,7 @@ extension Megrez { } /// 公開:該組字器是否為空。 - public var isEmpty: Bool { grid.isEmpty } + public var isEmpty: Bool { mutGrid.isEmpty } /// 公開:該組字器的軌格(唯讀)。 public var grid: Grid { mutGrid } @@ -62,7 +62,7 @@ extension Megrez { /// - lm: 語言模型。可以是任何基於 Megrez.LanguageModel 的衍生型別。 /// - length: 指定該組字器內可以允許的最大詞長,預設為 10 字。 /// - separator: 多字讀音鍵當中用以分割漢字讀音的記號,預設為空。 - public init(lm: LanguageModel, length: Int = 10, separator: String = "") { + public init(lm: LanguageModelProtocol, length: Int = 10, separator: String = "") { mutLM = lm mutGrid = .init(spanLength: abs(length)) // 防呆 joinSeparator = separator @@ -112,7 +112,7 @@ extension Megrez { return true } - /// 移除該組字器的第一個讀音單元。 + /// 移除該組字器最先被輸入的第 X 個讀音單元。 /// /// 用於輸入法組字區長度上限處理: /// 將該位置要溢出的敲字內容遞交之後、再執行這個函式。 @@ -140,8 +140,8 @@ extension Megrez { /// 對已給定的軌格按照給定的位置與條件進行正向爬軌。 /// - Parameters: - /// - at: 開始爬軌的位置。 - /// - score: 給定累計權重,非必填參數。預設值為 0。 + /// - location: 開始爬軌的位置。 + /// - accumulatedScore: 給定累計權重,非必填參數。預設值為 0。 /// - joinedPhrase: 用以統計累計長詞的內部參數,請勿主動使用。 /// - longPhrases: 用以統計累計長詞的內部參數,請勿主動使用。 public func walk( @@ -160,8 +160,8 @@ extension Megrez { /// 對已給定的軌格按照給定的位置與條件進行反向爬軌。 /// - Parameters: - /// - at: 開始爬軌的位置。 - /// - score: 給定累計權重,非必填參數。預設值為 0。 + /// - location: 開始爬軌的位置。 + /// - accumulatedScore: 給定累計權重,非必填參數。預設值為 0。 /// - joinedPhrase: 用以統計累計長詞的內部參數,請勿主動使用。 /// - longPhrases: 用以統計累計長詞的內部參數,請勿主動使用。 public func reverseWalk( @@ -219,11 +219,9 @@ extension Megrez { } else { // 看看當前格位有沒有更長的候選字詞。 var longPhrases = [String]() - for theAnchor in nodes { + for theAnchor in nodes.lazy.filter({ $0.spanningLength > 1 }) { guard let theNode = theAnchor.node else { continue } - if theAnchor.spanningLength > 1 { - longPhrases.append(theNode.currentKeyValue.value) - } + longPhrases.append(theNode.currentKeyValue.value) } longPhrases = longPhrases.stableSorted { @@ -249,10 +247,10 @@ extension Megrez { } var result: [NodeAnchor] = paths[0] - for neta in paths { - if neta.last!.accumulatedScore > result.last!.accumulatedScore { - result = neta - } + for neta in paths.lazy.filter({ + $0.last!.accumulatedScore > result.last!.accumulatedScore + }) { + result = neta } return result @@ -267,29 +265,20 @@ extension Megrez { for p in itrBegin.. itrEnd { - break - } + if p + q > itrEnd { break } let arrSlice = mutReadings[p..<(p + q)] let combinedReading: String = join(slice: arrSlice, separator: joinSeparator) - - if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { - let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading) - if !unigrams.isEmpty { - let n = Node(key: combinedReading, unigrams: unigrams) - mutGrid.insertNode(node: n, location: p, spanningLength: q) - } - } + if mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { continue } + let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading) + if unigrams.isEmpty { continue } + let n = Node(key: combinedReading, unigrams: unigrams) + mutGrid.insertNode(node: n, location: p, spanningLength: q) } } } private func join(slice arrSlice: ArraySlice, separator: String) -> String { - var arrResult: [String] = [] - for value in arrSlice { - arrResult.append(value) - } - return arrResult.joined(separator: separator) + arrSlice.joined(separator: separator) } } } @@ -303,7 +292,7 @@ extension Sequence { /// /// - Parameter areInIncreasingOrder: Return nil when two element are equal. /// - Returns: The sorted collection. - func stableSorted( + fileprivate func stableSorted( by areInIncreasingOrder: (Element, Element) throws -> Bool ) rethrows -> [Element] diff --git a/Source/Modules/LanguageParsers/Megrez/2_Grid.swift b/Source/Modules/LanguageParsers/Megrez/2_Grid.swift index 8b1b82b8..fcb7697a 100644 --- a/Source/Modules/LanguageParsers/Megrez/2_Grid.swift +++ b/Source/Modules/LanguageParsers/Megrez/2_Grid.swift @@ -32,15 +32,16 @@ extension Megrez { /// 該幅位內可以允許的最大詞長。 private var mutMaxBuildSpanLength = 10 - /// 公開:該幅位內可以允許的最大詞長。 + /// 公開:該軌格內可以允許的最大幅位長度。 public var maxBuildSpanLength: Int { mutMaxBuildSpanLength } - /// 軌格的寬度,也就是其內的幅位陣列當中的幅位數量。 - var width: Int { mutSpans.count } + /// 公開:軌格的寬度,也就是其內的幅位陣列當中的幅位數量。 + public var width: Int { mutSpans.count } - /// 軌格是否為空。 - var isEmpty: Bool { mutSpans.isEmpty } + /// 公開:軌格是否為空。 + public var isEmpty: Bool { mutSpans.isEmpty } + /// 初期化轨格。 public init(spanLength: Int = 10) { mutMaxBuildSpanLength = spanLength mutSpans = [Megrez.Span]() @@ -90,11 +91,10 @@ extension Megrez { public func expandGridByOneAt(location: Int) { let location = abs(location) // 防呆 mutSpans.insert(Span(), at: location) - if location != 0, location != mutSpans.count { - for i in 0.. [NodeAnchor] { let location = abs(location) // 防呆 var results = [NodeAnchor]() - if location < mutSpans.count { // 此時 mutSpans 必然不為空 - let span = mutSpans[location] - for i in 1...maxBuildSpanLength { - if let np = span.node(length: i) { - results.append( - NodeAnchor( - node: np, - location: location, - spanningLength: i - ) + if location >= mutSpans.count { return results } + // 此時 mutSpans 必然不為空,因為 location 不可能小於 0。 + let span = mutSpans[location] + for i in 1...maxBuildSpanLength { + if let np = span.node(length: i) { + results.append( + .init( + node: np, + location: location, + spanningLength: i ) - } + ) } } return results @@ -143,20 +143,18 @@ extension Megrez { public func nodesEndingAt(location: Int) -> [NodeAnchor] { let location = abs(location) // 防呆 var results = [NodeAnchor]() - if !mutSpans.isEmpty, location <= mutSpans.count { - for i in 0..= location { - if let np = span.node(length: location - i) { - results.append( - NodeAnchor( - node: np, - location: i, - spanningLength: location - i - ) - ) - } - } + if mutSpans.isEmpty || location > mutSpans.count { return results } + for i in 0.. [NodeAnchor] { let location = abs(location) // 防呆 var results = [NodeAnchor]() - if !mutSpans.isEmpty, location <= mutSpans.count { - for i in 0..= location { - for j in 1...span.maximumLength { - if i + j < location { - continue - } - if let np = span.node(length: j) { - results.append( - NodeAnchor( - node: np, - location: i, - spanningLength: location - i - ) - ) - } - } + if mutSpans.isEmpty || location > mutSpans.count { return results } + for i in 0.. Node? { - mutLengthNodeMap[abs(length)] // 防呆 + // 防呆 Abs() + mutLengthNodeMap.keys.contains(abs(length)) ? mutLengthNodeMap[abs(length)] : nil } } } diff --git a/Source/Modules/LanguageParsers/Megrez/4_Node.swift b/Source/Modules/LanguageParsers/Megrez/4_Node.swift index 4f86ad46..1672b2b9 100644 --- a/Source/Modules/LanguageParsers/Megrez/4_Node.swift +++ b/Source/Modules/LanguageParsers/Megrez/4_Node.swift @@ -26,8 +26,6 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. extension Megrez { /// 節點。 public class Node { - /// 當前節點對應的語言模型。 - private let mutLM: LanguageModel = .init() /// 鍵。 private var mutKey: String = "" /// 當前節點的當前被選中的候選字詞「在該節點內的」目前的權重。 @@ -37,11 +35,11 @@ extension Megrez { /// 雙元圖陣列。 private var mutBigrams: [Bigram] /// 候選字詞陣列,以鍵值陣列的形式存在。 - private var mutCandidates: [KeyValuePair] = [] + private var mutCandidates: [KeyValuePaired] = [] /// 專門「用單元圖資料值來調查索引值」的辭典。 private var mutValueUnigramIndexMap: [String: Int] = [:] /// 專門「用給定鍵值來取對應的雙元圖陣列」的辭典。 - private var mutPrecedingBigramMap: [KeyValuePair: [Megrez.Bigram]] = [:] + private var mutPrecedingBigramMap: [KeyValuePaired: [Megrez.Bigram]] = [:] /// 狀態標記變數,用來記載當前節點是否處於候選字詞鎖定狀態。 private var mutCandidateFixed: Bool = false /// 用來登記「當前選中的單元圖」的索引值的變數。 @@ -54,21 +52,21 @@ extension Megrez { } /// 公開:候選字詞陣列(唯讀),以鍵值陣列的形式存在。 - var candidates: [KeyValuePair] { mutCandidates } + public var candidates: [KeyValuePaired] { mutCandidates } /// 公開:用來登記「當前選中的單元圖」的索引值的變數(唯讀)。 - var isCandidateFixed: Bool { mutCandidateFixed } + public var isCandidateFixed: Bool { mutCandidateFixed } /// 公開:鍵(唯讀)。 - var key: String { mutKey } + public var key: String { mutKey } /// 公開:當前節點的當前被選中的候選字詞「在該節點內的」目前的權重(唯讀)。 - var score: Double { mutScore } + public var score: Double { mutScore } /// 公開:當前被選中的候選字詞的鍵值配對。 - var currentKeyValue: KeyValuePair { - mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex] + public var currentKeyValue: KeyValuePaired { + mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePaired() : mutCandidates[mutSelectedUnigramIndex] } /// 公開:給出當前單元圖陣列內最高的權重數值。 - var highestUnigramScore: Double { mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score } + public var highestUnigramScore: Double { mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score } /// 初期化一個節點。 /// - Parameters: @@ -93,7 +91,9 @@ extension Megrez { mutCandidates.append(gram.keyValue) } - for gram in bigrams { + for gram in bigrams.lazy.filter({ [self] in + mutPrecedingBigramMap.keys.contains($0.precedingKeyValue) + }) { mutPrecedingBigramMap[gram.precedingKeyValue]?.append(gram) } } @@ -101,19 +101,18 @@ extension Megrez { /// 對擁有「給定的前述鍵值陣列」的節點提權。 /// - Parameters: /// - precedingKeyValues: 前述鍵值陣列。 - public func primeNodeWith(precedingKeyValues: [KeyValuePair]) { + public func primeNodeWith(precedingKeyValues: [KeyValuePaired]) { var newIndex = mutSelectedUnigramIndex var max = mutScore if !isCandidateFixed { for neta in precedingKeyValues { let bigrams = mutPrecedingBigramMap[neta] ?? [] - for bigram in bigrams { - guard bigram.score > max else { continue } - if let valRetrieved = mutValueUnigramIndexMap[bigram.keyValue.value] { - newIndex = valRetrieved as Int - max = bigram.score - } + for bigram in bigrams.lazy.filter({ [self] in + $0.score > max && mutValueUnigramIndexMap.keys.contains($0.keyValue.value) + }) { + newIndex = mutValueUnigramIndexMap[bigram.keyValue.value] ?? newIndex + max = bigram.score } } } @@ -156,10 +155,8 @@ extension Megrez { /// - Parameters: /// - candidate: 給定的候選字詞字串。 public func scoreFor(candidate: String) -> Double { - for unigram in mutUnigrams { - if unigram.keyValue.value == candidate { - return unigram.score - } + for unigram in mutUnigrams.lazy.filter({ $0.keyValue.value == candidate }) { + return unigram.score } return 0.0 } diff --git a/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift b/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift index d585aba7..abe8c822 100644 --- a/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift +++ b/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift @@ -23,24 +23,35 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +public protocol LanguageModelProtocol { + /// 給定鍵,讓語言模型找給一組單元圖陣列。 + func unigramsFor(key: String) -> [Megrez.Unigram] + + /// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。 + func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] + + /// 給定鍵,確認是否有單元圖記錄在庫。 + func hasUnigramsFor(key: String) -> Bool +} + extension Megrez { /// 語言模型框架,回頭實際使用時需要派生一個型別、且重寫相關函式。 - open class LanguageModel { + open class LanguageModel: LanguageModelProtocol { public init() {} // 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函式的參數設計。 - /// 給定鍵,讓語言模型找給一筆單元圖。 + /// 給定鍵,讓語言模型找給一組單元圖陣列。 open func unigramsFor(key: String) -> [Megrez.Unigram] { key.isEmpty ? [Megrez.Unigram]() : [Megrez.Unigram]() } - /// 給定當前鍵與前述鍵,讓語言模型找給一筆雙元圖。 + /// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。 open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] { precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() } - /// 給定鍵, + /// 給定鍵,確認是否有單元圖記錄在庫。 open func hasUnigramsFor(key: String) -> Bool { key.count != 0 } diff --git a/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift b/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift index cca1069f..b64e2658 100644 --- a/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift +++ b/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift @@ -27,9 +27,9 @@ extension Megrez { /// 雙元圖。 @frozen public struct Bigram: Equatable, CustomStringConvertible { /// 當前鍵值。 - public var keyValue: KeyValuePair + public var keyValue: KeyValuePaired /// 前述鍵值。 - public var precedingKeyValue: KeyValuePair + public var precedingKeyValue: KeyValuePaired /// 權重。 public var score: Double /// 將當前雙元圖列印成一個字串。 @@ -42,7 +42,7 @@ extension Megrez { /// - precedingKeyValue: 前述鍵值。 /// - keyValue: 當前鍵值。 /// - score: 權重(雙精度小數)。 - public init(precedingKeyValue: KeyValuePair, keyValue: KeyValuePair, score: Double) { + public init(precedingKeyValue: KeyValuePaired, keyValue: KeyValuePaired, score: Double) { self.keyValue = keyValue self.precedingKeyValue = precedingKeyValue self.score = score diff --git a/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift b/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift index bced45ad..4bcd894e 100644 --- a/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift +++ b/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift @@ -27,7 +27,7 @@ extension Megrez { /// 單元圖。 @frozen public struct Unigram: Equatable, CustomStringConvertible { /// 鍵值。 - public var keyValue: KeyValuePair + public var keyValue: KeyValuePaired /// 權重。 public var score: Double /// 將當前單元圖列印成一個字串。 @@ -39,7 +39,7 @@ extension Megrez { /// - Parameters: /// - keyValue: 鍵值。 /// - score: 權重(雙精度小數)。 - public init(keyValue: KeyValuePair, score: Double) { + public init(keyValue: KeyValuePaired, score: Double) { self.keyValue = keyValue self.score = score } @@ -49,11 +49,6 @@ extension Megrez { hasher.combine(score) } - // 這個函式不再需要了。 - public static func compareScore(a: Unigram, b: Unigram) -> Bool { - a.score > b.score - } - public static func == (lhs: Unigram, rhs: Unigram) -> Bool { lhs.keyValue == rhs.keyValue && lhs.score == rhs.score } diff --git a/Source/Modules/LanguageParsers/Megrez/7_KeyValuePair.swift b/Source/Modules/LanguageParsers/Megrez/7_KeyValuePaired.swift similarity index 72% rename from Source/Modules/LanguageParsers/Megrez/7_KeyValuePair.swift rename to Source/Modules/LanguageParsers/Megrez/7_KeyValuePaired.swift index 851ab3df..3e9dee80 100644 --- a/Source/Modules/LanguageParsers/Megrez/7_KeyValuePair.swift +++ b/Source/Modules/LanguageParsers/Megrez/7_KeyValuePaired.swift @@ -25,17 +25,19 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. extension Megrez { /// 鍵值配對。 - @frozen public struct KeyValuePair: Equatable, Hashable, Comparable, CustomStringConvertible { + @frozen public struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible { /// 鍵。一般情況下用來放置讀音等可以用來作為索引的內容。 public var key: String /// 資料值。 public var value: String /// 將當前鍵值列印成一個字串。 - public var description: String { - "(" + key + "," + value + ")" - } + public var description: String { "(" + key + "," + value + ")" } + /// 判斷當前鍵值配對是否合規。如果鍵與值有任一為空,則結果為 false。 + public var isValid: Bool { !key.isEmpty && !value.isEmpty } + /// 將當前鍵值列印成一個字串,但如果該鍵值配對為空的話則僅列印「()」。 + public var toNGramKey: String { !isValid ? "()" : "(" + key + "," + value + ")" } - /// 初期化一組鍵值配對 + /// 初期化一組鍵值配對。 /// - Parameters: /// - key: 鍵。一般情況下用來放置讀音等可以用來作為索引的內容。 /// - value: 資料值。 @@ -49,23 +51,23 @@ extension Megrez { hasher.combine(value) } - public static func == (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func == (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { lhs.key.count == rhs.key.count && lhs.value == rhs.value } - public static func < (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { (lhs.key.count < rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value < rhs.value) } - public static func > (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func > (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { (lhs.key.count > rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value > rhs.value) } - public static func <= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func <= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { (lhs.key.count <= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value <= rhs.value) } - public static func >= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + public static func >= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { (lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value) } } diff --git a/Source/Modules/SFX/clsSFX.swift b/Source/Modules/SFX/clsSFX.swift index 6a6b4e04..d3b211fa 100644 --- a/Source/Modules/SFX/clsSFX.swift +++ b/Source/Modules/SFX/clsSFX.swift @@ -25,7 +25,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import AVFoundation import Foundation -public class clsSFX { +public enum clsSFX { static func beep() { let filePath = Bundle.main.path(forResource: mgrPrefs.shouldNotFartInLieuOfBeep ? "Beep" : "Fart", ofType: "m4a")! let fileURL = URL(fileURLWithPath: filePath) @@ -33,4 +33,15 @@ public class clsSFX { AudioServicesCreateSystemSoundID(fileURL as CFURL, &soundID) AudioServicesPlaySystemSound(soundID) } + + static func beep(count: Int = 1) { + if count <= 1 { + clsSFX.beep() + return + } + for _ in 0...count { + clsSFX.beep() + usleep(500_000) + } + } } diff --git a/Source/Resources/Base.lproj/Localizable.strings b/Source/Resources/Base.lproj/Localizable.strings index 9fc16468..1ea7dee5 100644 --- a/Source/Resources/Base.lproj/Localizable.strings +++ b/Source/Resources/Base.lproj/Localizable.strings @@ -62,6 +62,7 @@ "Loading CHS Core Dict..." = "Loading CHS Core Dict..."; "Loading CHT Core Dict..." = "Loading CHT Core Dict..."; "Core Dict loading complete." = "Core Dict loading complete."; +"Optimize Memorized Phrases" = "Optimize Memorized Phrases"; // The followings are the category names used in the Symbol menu. "catCommonSymbols" = "CommonSymbols"; @@ -88,6 +89,7 @@ "Apple Chewing - Dachen" = "Apple Chewing - Dachen"; "Apple Chewing - Eten Traditional" = "Apple Chewing - Eten Traditional"; "Apple Dynamic Bopomofo Basic Keyboard Layouts (Dachen & Eten Traditional) must match the Dachen parser in order to be functional." = "Apple Dynamic Bopomofo Basic Keyboard Layouts (Dachen & Eten Traditional) must match the Dachen parser in order to be functional."; +"Applying typing suggestions from half-life user override model" = "Applying typing suggestions from half-life user override model"; "at the rear of the phrase (like Microsoft New Phonetic)" = "at the rear of the phrase (like Microsoft New Phonetic)"; "Auto-convert traditional Chinese glyphs to JIS Shinjitai characters" = "Auto-convert traditional Chinese glyphs to JIS Shinjitai characters"; "Auto-convert traditional Chinese glyphs to KangXi characters" = "Auto-convert traditional Chinese glyphs to KangXi characters"; @@ -113,7 +115,7 @@ "Debug Mode" = "Debug Mode"; "Dictionary" = "Dictionary"; "Emulating select-candidate-per-character mode" = "Emulating select-candidate-per-character mode"; -"Enable CNS11643 Support (2022-04-27)" = "Enable CNS11643 Support (2022-04-27)"; +"Enable CNS11643 Support (2022-06-15)" = "Enable CNS11643 Support (2022-06-15)"; "Enable Space key for calling candidate window" = "Enable Space key for calling candidate window"; "Enable symbol input support (incl. certain emoji symbols)" = "Enable symbol input support (incl. certain emoji symbols)"; "English" = "English"; diff --git a/Source/Resources/en.lproj/Localizable.strings b/Source/Resources/en.lproj/Localizable.strings index 9fc16468..1ea7dee5 100644 --- a/Source/Resources/en.lproj/Localizable.strings +++ b/Source/Resources/en.lproj/Localizable.strings @@ -62,6 +62,7 @@ "Loading CHS Core Dict..." = "Loading CHS Core Dict..."; "Loading CHT Core Dict..." = "Loading CHT Core Dict..."; "Core Dict loading complete." = "Core Dict loading complete."; +"Optimize Memorized Phrases" = "Optimize Memorized Phrases"; // The followings are the category names used in the Symbol menu. "catCommonSymbols" = "CommonSymbols"; @@ -88,6 +89,7 @@ "Apple Chewing - Dachen" = "Apple Chewing - Dachen"; "Apple Chewing - Eten Traditional" = "Apple Chewing - Eten Traditional"; "Apple Dynamic Bopomofo Basic Keyboard Layouts (Dachen & Eten Traditional) must match the Dachen parser in order to be functional." = "Apple Dynamic Bopomofo Basic Keyboard Layouts (Dachen & Eten Traditional) must match the Dachen parser in order to be functional."; +"Applying typing suggestions from half-life user override model" = "Applying typing suggestions from half-life user override model"; "at the rear of the phrase (like Microsoft New Phonetic)" = "at the rear of the phrase (like Microsoft New Phonetic)"; "Auto-convert traditional Chinese glyphs to JIS Shinjitai characters" = "Auto-convert traditional Chinese glyphs to JIS Shinjitai characters"; "Auto-convert traditional Chinese glyphs to KangXi characters" = "Auto-convert traditional Chinese glyphs to KangXi characters"; @@ -113,7 +115,7 @@ "Debug Mode" = "Debug Mode"; "Dictionary" = "Dictionary"; "Emulating select-candidate-per-character mode" = "Emulating select-candidate-per-character mode"; -"Enable CNS11643 Support (2022-04-27)" = "Enable CNS11643 Support (2022-04-27)"; +"Enable CNS11643 Support (2022-06-15)" = "Enable CNS11643 Support (2022-06-15)"; "Enable Space key for calling candidate window" = "Enable Space key for calling candidate window"; "Enable symbol input support (incl. certain emoji symbols)" = "Enable symbol input support (incl. certain emoji symbols)"; "English" = "English"; diff --git a/Source/Resources/ja.lproj/Localizable.strings b/Source/Resources/ja.lproj/Localizable.strings index 0a2fd83c..97071713 100644 --- a/Source/Resources/ja.lproj/Localizable.strings +++ b/Source/Resources/ja.lproj/Localizable.strings @@ -62,6 +62,7 @@ "Loading CHS Core Dict..." = "簡体中国語核心辞書読込中…"; "Loading CHT Core Dict..." = "繁体中国語核心辞書読込中…"; "Core Dict loading complete." = "核心辞書読込完了"; +"Optimize Memorized Phrases" = "臨時記憶資料を整う"; // The followings are the category names used in the Symbol menu. "catCommonSymbols" = "常用"; @@ -88,6 +89,7 @@ "Apple Chewing - Dachen" = "Apple 大千注音キーボード"; "Apple Chewing - Eten Traditional" = "Apple 倚天傳統キーボード"; "Apple Dynamic Bopomofo Basic Keyboard Layouts (Dachen & Eten Traditional) must match the Dachen parser in order to be functional." = "Apple 動態注音キーボード (大千と倚天伝統) を使うには、共通語分析器の配列を大千と設定すべきである。"; +"Applying typing suggestions from half-life user override model" = "入力中で臨時記憶モジュールからお薦めの候補を自動的に選ぶ"; "at the rear of the phrase (like Microsoft New Phonetic)" = "単語の後で // Microsoft 新注音入力のやり方"; "Auto-convert traditional Chinese glyphs to JIS Shinjitai characters" = "入力した繁体字を日文 JIS 新字体と自動変換"; "Auto-convert traditional Chinese glyphs to KangXi characters" = "入力した繁体字を康熙字体と自動変換"; @@ -113,7 +115,7 @@ "Debug Mode" = "欠陥辿着モード"; "Dictionary" = "辞書設定"; "Emulating select-candidate-per-character mode" = "漢字1つづつ全候補選択入力モード"; -"Enable CNS11643 Support (2022-04-27)" = "全字庫モード // 入力可能な漢字数を倍増す (2022-04-27)"; +"Enable CNS11643 Support (2022-06-15)" = "全字庫モード // 入力可能な漢字数を倍増す (2022-06-15)"; "Enable Space key for calling candidate window" = "Space キーで入力候補を呼び出す"; "Enable symbol input support (incl. certain emoji symbols)" = "僅かなる絵文字も含む符号入力サポートを起用"; "English" = "英語"; diff --git a/Source/Resources/zh-Hans.lproj/Localizable.strings b/Source/Resources/zh-Hans.lproj/Localizable.strings index d514925c..7b92f19e 100644 --- a/Source/Resources/zh-Hans.lproj/Localizable.strings +++ b/Source/Resources/zh-Hans.lproj/Localizable.strings @@ -62,6 +62,7 @@ "Loading CHS Core Dict..." = "载入简体中文核心辞典…"; "Loading CHT Core Dict..." = "载入繁体中文核心辞典…"; "Core Dict loading complete." = "核心辞典载入完毕"; +"Optimize Memorized Phrases" = "精简临时记忆语汇资料"; // The followings are the category names used in the Symbol menu. "catCommonSymbols" = "常用"; @@ -88,6 +89,7 @@ "Apple Chewing - Dachen" = "Apple 大千注音键盘排列"; "Apple Chewing - Eten Traditional" = "Apple 倚天传统键盘排列"; "Apple Dynamic Bopomofo Basic Keyboard Layouts (Dachen & Eten Traditional) must match the Dachen parser in order to be functional." = "Apple 动态注音键盘布局(大千与倚天)要求普通话/国音分析器得配置为大千排列。"; +"Applying typing suggestions from half-life user override model" = "在敲字时自动套用来自半衰记忆模组的建议"; "at the rear of the phrase (like Microsoft New Phonetic)" = "将游标置于词语后方 // Windows 微软新注音风格"; "Auto-convert traditional Chinese glyphs to JIS Shinjitai characters" = "自动将繁体中文字转为日文 JIS 新字体"; "Auto-convert traditional Chinese glyphs to KangXi characters" = "自动将繁体中文字转为康熙正体字"; @@ -113,7 +115,7 @@ "Debug Mode" = "侦错模式"; "Dictionary" = "辞典"; "Emulating select-candidate-per-character mode" = "模拟 90 年代前期注音逐字选字输入风格"; -"Enable CNS11643 Support (2022-04-27)" = "启用 CNS11643 全字库支援 (2022-04-27)"; +"Enable CNS11643 Support (2022-06-15)" = "启用 CNS11643 全字库支援 (2022-06-15)"; "Enable Space key for calling candidate window" = "敲空格键以呼出候选字窗"; "Enable symbol input support (incl. certain emoji symbols)" = "启用包括少许绘文字在内的符号输入支援"; "English" = "英语"; diff --git a/Source/Resources/zh-Hant.lproj/Localizable.strings b/Source/Resources/zh-Hant.lproj/Localizable.strings index 95f5f1d9..b286d1f4 100644 --- a/Source/Resources/zh-Hant.lproj/Localizable.strings +++ b/Source/Resources/zh-Hant.lproj/Localizable.strings @@ -62,6 +62,7 @@ "Loading CHS Core Dict..." = "載入簡體中文核心辭典…"; "Loading CHT Core Dict..." = "載入繁體中文核心辭典…"; "Core Dict loading complete." = "核心辭典載入完畢"; +"Optimize Memorized Phrases" = "精簡臨時記憶語彙資料"; // The followings are the category names used in the Symbol menu. "catCommonSymbols" = "常用"; @@ -88,6 +89,7 @@ "Apple Chewing - Dachen" = "Apple 大千注音鍵盤佈局"; "Apple Chewing - Eten Traditional" = "Apple 倚天傳統鍵盤佈局"; "Apple Dynamic Bopomofo Basic Keyboard Layouts (Dachen & Eten Traditional) must match the Dachen parser in order to be functional." = "Apple 動態注音鍵盤佈局(大千與倚天)要求普通話/國音分析器得配置為大千排列。"; +"Applying typing suggestions from half-life user override model" = "在敲字時自動套用來自半衰記憶模組的建議"; "at the rear of the phrase (like Microsoft New Phonetic)" = "將游標置於詞語後方 // Windows 微軟新注音風格"; "Auto-convert traditional Chinese glyphs to JIS Shinjitai characters" = "自動將繁體中文字轉為日文 JIS 新字體"; "Auto-convert traditional Chinese glyphs to KangXi characters" = "自動將繁體中文字轉為康熙正體字"; @@ -113,7 +115,7 @@ "Debug Mode" = "偵錯模式"; "Dictionary" = "辭典"; "Emulating select-candidate-per-character mode" = "模擬 90 年代前期注音逐字選字輸入風格"; -"Enable CNS11643 Support (2022-04-27)" = "啟用 CNS11643 全字庫支援 (2022-04-27)"; +"Enable CNS11643 Support (2022-06-15)" = "啟用 CNS11643 全字庫支援 (2022-06-15)"; "Enable Space key for calling candidate window" = "敲空格鍵以呼出候選字窗"; "Enable symbol input support (incl. certain emoji symbols)" = "啟用包括少許繪文字在內的符號輸入支援"; "English" = "英語"; diff --git a/Source/UI/CandidateUI/ctlCandidateUniversal.swift b/Source/UI/CandidateUI/ctlCandidateUniversal.swift index c421f8d3..967bf988 100644 --- a/Source/UI/CandidateUI/ctlCandidateUniversal.swift +++ b/Source/UI/CandidateUI/ctlCandidateUniversal.swift @@ -74,6 +74,11 @@ private class vwrCandidateUniversal: NSView { @objc(setKeyLabels:displayedCandidates:) func set(keyLabels labels: [String], displayedCandidates candidates: [String]) { + let candidates = candidates.map { theCandidate -> String in + let theConverted = IME.kanjiConversionIfRequired(theCandidate) + return (theCandidate == theConverted) ? theCandidate : "\(theConverted)(\(theCandidate))" + } + let count = min(labels.count, candidates.count) keyLabels = Array(labels[0.. - @@ -681,9 +680,9 @@ + - - + + + - - + - - - + + - + - + + - - - + + + + + + diff --git a/Source/WindowNIBs/en.lproj/frmPrefWindow.strings b/Source/WindowNIBs/en.lproj/frmPrefWindow.strings index 0433aa98..93d74f20 100644 --- a/Source/WindowNIBs/en.lproj/frmPrefWindow.strings +++ b/Source/WindowNIBs/en.lproj/frmPrefWindow.strings @@ -41,6 +41,7 @@ "BSK-bH-Gct.title" = "Auto-convert traditional Chinese glyphs to KangXi characters"; "cf2-se-PDO.title" = "Dictionary and Language Models"; "chkAllowBoostingSingleKanjiAsUserPhrase.title" = "Allow boosting / excluding a candidate of single kanji"; +"chkFetchSuggestionsFromUserOverrideModel.title" = "Applying typing suggestions from half-life user override model"; "dIN-TZ-67g.title" = "Space to +cycle candidates, Shift+Space to +cycle pages"; "E1l-m8-xgb.title" = "Advanced Settings"; "eia-1F-Do0.title" = "Auto-convert traditional Chinese glyphs to JIS Shinjitai characters"; @@ -71,7 +72,7 @@ "TXr-FF-ehw.title" = "Traditional Chinese"; "ueU-Rz-a1C.title" = "Choose the behavior of (Shift+)Tab key in the candidate window."; "Uyz-xL-TVN.title" = "Output Settings"; -"W24-T4-cg0.title" = "Enable CNS11643 Support (2022-04-27)"; +"W24-T4-cg0.title" = "Enable CNS11643 Support (2022-06-15)"; "wFR-zX-M8H.title" = "Show Hanyu-Pinyin in the inline composition buffer"; "wN3-k3-b2a.title" = "Choose your desired user data folder path. Will be omitted if invalid."; "wQ9-px-b07.title" = "Apple Dynamic Bopomofo Basic Keyboard Layouts (Dachen & Eten Traditional) must match the Dachen parser in order to be functional."; diff --git a/Source/WindowNIBs/ja.lproj/frmPrefWindow.strings b/Source/WindowNIBs/ja.lproj/frmPrefWindow.strings index caf8894d..3b111111 100644 --- a/Source/WindowNIBs/ja.lproj/frmPrefWindow.strings +++ b/Source/WindowNIBs/ja.lproj/frmPrefWindow.strings @@ -41,6 +41,7 @@ "BSK-bH-Gct.title" = "入力した繁体字を康熙字体と自動変換"; "cf2-se-PDO.title" = "辞書と言語モデル"; "chkAllowBoostingSingleKanjiAsUserPhrase.title" = "即排除/即最優先にできる候補の文字数の最低限は1字とする"; +"chkFetchSuggestionsFromUserOverrideModel.title" = "入力中で臨時記憶モジュールからお薦めの候補を自動的に選ぶ"; "dIN-TZ-67g.title" = "Shift+Space で次のページ、Space で次の候補文字を"; "E1l-m8-xgb.title" = "詳細設定"; "eia-1F-Do0.title" = "入力した繁体字を日文 JIS 新字体と自動変換"; @@ -71,7 +72,7 @@ "TXr-FF-ehw.title" = "繁体中国語"; "ueU-Rz-a1C.title" = "入力候補陳列での (Shift+)Tab キーの輪番切替対象をご指定ください。"; "Uyz-xL-TVN.title" = "出力設定"; -"W24-T4-cg0.title" = "全字庫モード // 入力可能の漢字数倍増 (2022-04-27)"; +"W24-T4-cg0.title" = "全字庫モード // 入力可能の漢字数倍増 (2022-06-15)"; "wFR-zX-M8H.title" = "弁音合併入力(入力緩衝列で代わりに漢語弁音の音読み)"; "wN3-k3-b2a.title" = "欲しがるユーザー辞書保存先をご指定ください。無効の保存先設定は効かぬ。"; "wQ9-px-b07.title" = "Apple 動態注音キーボード (大千と倚天伝統) を使うには、共通語分析器の注音配列を大千と設定すべきである。"; diff --git a/Source/WindowNIBs/zh-Hans.lproj/frmPrefWindow.strings b/Source/WindowNIBs/zh-Hans.lproj/frmPrefWindow.strings index 7fb31c88..bb1acf76 100644 --- a/Source/WindowNIBs/zh-Hans.lproj/frmPrefWindow.strings +++ b/Source/WindowNIBs/zh-Hans.lproj/frmPrefWindow.strings @@ -41,7 +41,7 @@ "BSK-bH-Gct.title" = "自动将繁体中文字转换为康熙正体字"; "cf2-se-PDO.title" = "辞典&語言模型"; "chkAllowBoostingSingleKanjiAsUserPhrase.title" = "将可以就地升权/排除的候选字词的最短词长设为单个汉字"; -"chkAllowBoostingSingleKanjiAsUserPhrase.title" = "Allow boosting / excluding a candidate of single kanji"; +"chkFetchSuggestionsFromUserOverrideModel.title" = "在敲字时自动套用来自半衰记忆模组的建议"; "dIN-TZ-67g.title" = "Shift+Space 换下一页,Space 换选下一个候选字。"; "E1l-m8-xgb.title" = "进阶设定"; "eia-1F-Do0.title" = "自动将繁体中文字转换为日本简化字(JIS 新字体)"; @@ -72,7 +72,7 @@ "TXr-FF-ehw.title" = "繁体中文"; "ueU-Rz-a1C.title" = "指定 (Shift+)Tab 热键在选字窗内的轮替操作对象。"; "Uyz-xL-TVN.title" = "输出设定"; -"W24-T4-cg0.title" = "启用 CNS11643 全字库支援 (2022-04-27)"; +"W24-T4-cg0.title" = "启用 CNS11643 全字库支援 (2022-06-15)"; "wFR-zX-M8H.title" = "拼音并击模式(组字区内看到的是汉语拼音)"; "wN3-k3-b2a.title" = "请在此指定您想指定的使用者语汇档案目录。无效值会被忽略。"; "wQ9-px-b07.title" = "Apple 动态注音键盘布局(大千与倚天)要求普通话/国音分析器的注音排列得配置为大千排列。"; diff --git a/Source/WindowNIBs/zh-Hant.lproj/frmPrefWindow.strings b/Source/WindowNIBs/zh-Hant.lproj/frmPrefWindow.strings index 37a832b6..b98afaf4 100644 --- a/Source/WindowNIBs/zh-Hant.lproj/frmPrefWindow.strings +++ b/Source/WindowNIBs/zh-Hant.lproj/frmPrefWindow.strings @@ -41,6 +41,7 @@ "BSK-bH-Gct.title" = "自動將繁體中文字轉換為康熙正體字"; "cf2-se-PDO.title" = "辭典&語言模型"; "chkAllowBoostingSingleKanjiAsUserPhrase.title" = "將可以就地升權/排除的候選字詞的最短詞長設為單個漢字"; +"chkFetchSuggestionsFromUserOverrideModel.title" = "在敲字時自動套用來自半衰記憶模組的建議"; "dIN-TZ-67g.title" = "Shift+Space 換下一頁,Space 換選下一個候選字"; "E1l-m8-xgb.title" = "進階設定"; "eia-1F-Do0.title" = "自動將繁體中文字轉換為日本簡化字(JIS 新字體)"; @@ -71,7 +72,7 @@ "TXr-FF-ehw.title" = "繁體中文"; "ueU-Rz-a1C.title" = "指定 (Shift+)Tab 熱鍵在選字窗內的輪替操作對象。"; "Uyz-xL-TVN.title" = "輸出設定"; -"W24-T4-cg0.title" = "啟用 CNS11643 全字庫支援 (2022-04-27)"; +"W24-T4-cg0.title" = "啟用 CNS11643 全字庫支援 (2022-06-15)"; "wFR-zX-M8H.title" = "拼音並擊模式(組字區內看到的是漢語拼音)"; "wN3-k3-b2a.title" = "請在此指定您想指定的使用者語彙檔案目錄。無效值會被忽略。"; "wQ9-px-b07.title" = "Apple 動態注音鍵盤佈局(大千與倚天)要求普通話/國音分析器的注音排列得配置為大千排列。"; diff --git a/Update-Info.plist b/Update-Info.plist index c81a51b3..c3fdc05f 100644 --- a/Update-Info.plist +++ b/Update-Info.plist @@ -3,9 +3,9 @@ CFBundleShortVersionString - 1.7.1 + 1.7.2 CFBundleVersion - 1971 + 1972 UpdateInfoEndpoint https://gitee.com/vchewing/vChewing-macOS/raw/main/Update-Info.plist UpdateInfoSite diff --git a/UserPhraseEditor/StringExtension.swift b/UserPhraseEditor/StringExtension.swift index 946852e8..747045a0 100644 --- a/UserPhraseEditor/StringExtension.swift +++ b/UserPhraseEditor/StringExtension.swift @@ -52,14 +52,18 @@ extension String { // Tab to ASCII Space // 統整連續空格為一個 ASCII 空格 strProcessed.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") - strProcessed.regReplace(pattern: #"(^ | $)"#, replaceWith: "") // 去除行尾行首空格 - strProcessed.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & FF to LF, 且去除重複行 + // 去除行尾行首空格 + strProcessed.regReplace(pattern: #"(^ | $)"#, replaceWith: "") + strProcessed.regReplace(pattern: #"(\n | \n)"#, replaceWith: "\n") + // CR & FF to LF, 且去除重複行 + strProcessed.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") if strProcessed.prefix(1) == " " { // 去除檔案開頭空格 strProcessed.removeFirst() } if strProcessed.suffix(1) == " " { // 去除檔案結尾空格 strProcessed.removeLast() } + if cnvHYPYtoBPMF { // Step 2: Convert HanyuPinyin to Bopomofo. // 漢語拼音轉注音,得先從最長的可能的拼音組合開始轉起, diff --git a/vChewing.pkgproj b/vChewing.pkgproj index 06472123..2353d5e8 100644 --- a/vChewing.pkgproj +++ b/vChewing.pkgproj @@ -726,7 +726,7 @@ USE_HFS+_COMPRESSION VERSION - 1.7.1 + 1.7.2 TYPE 0 diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index b77c9f52..2f88d7fb 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -10,9 +10,10 @@ 5B0AF8B527B2C8290096FE54 /* StringExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B0AF8B427B2C8290096FE54 /* StringExtension.swift */; }; 5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B11328827B94CFB00E58451 /* AppleKeyboardConverter.swift */; }; 5B242403284B0D6500520FE4 /* ctlCandidateUniversal.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B242402284B0D6500520FE4 /* ctlCandidateUniversal.swift */; }; + 5B2F2BB6286216A500B8557B /* vChewingTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B2F2BB5286216A500B8557B /* vChewingTests.swift */; }; 5B3133BF280B229700A4A505 /* KeyHandler_States.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B3133BE280B229700A4A505 /* KeyHandler_States.swift */; }; 5B38F59A281E2E49007D5F5D /* 6_Unigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */; }; - 5B38F59B281E2E49007D5F5D /* 7_KeyValuePair.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */; }; + 5B38F59B281E2E49007D5F5D /* 7_KeyValuePaired.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePaired.swift */; }; 5B38F59C281E2E49007D5F5D /* 2_Grid.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */; }; 5B38F59D281E2E49007D5F5D /* 4_Node.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */; }; 5B38F59E281E2E49007D5F5D /* 6_Bigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */; }; @@ -21,7 +22,7 @@ 5B38F5A2281E2E49007D5F5D /* 0_Megrez.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */; }; 5B38F5A3281E2E49007D5F5D /* 3_Span.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */; }; 5B38F5A4281E2E49007D5F5D /* 5_LanguageModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */; }; - 5B3A87BC28597CDB0090E163 /* SymbolNode.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B3A87BB28597CDB0090E163 /* SymbolNode.swift */; }; + 5B3A87BC28597CDB0090E163 /* LMSymbolNode.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B3A87BB28597CDB0090E163 /* LMSymbolNode.swift */; }; 5B40730C281672610023DFFF /* lmAssociates.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B407309281672610023DFFF /* lmAssociates.swift */; }; 5B40730D281672610023DFFF /* lmReplacements.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B40730A281672610023DFFF /* lmReplacements.swift */; }; 5B54E743283A7D89001ECBDC /* lmCoreNS.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B54E742283A7D89001ECBDC /* lmCoreNS.swift */; }; @@ -128,6 +129,13 @@ remoteGlobalIDString = 5BD05BB727B2A429004C4F1D; remoteInfo = vChewingPhraseEditor; }; + 5B2F2BB7286216A500B8557B /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 6A0D4E9415FC0CFA00ABF4B3 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 6A0D4EA115FC0D2D00ABF4B3; + remoteInfo = vChewing; + }; 6ACA420015FC1DCC00935EF6 /* PBXContainerItemProxy */ = { isa = PBXContainerItemProxy; containerPortal = 6A0D4E9415FC0CFA00ABF4B3 /* Project object */; @@ -185,9 +193,11 @@ 5B18BA7427C7BD8C0056EB19 /* LICENSE-CHT.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "LICENSE-CHT.txt"; sourceTree = ""; }; 5B242402284B0D6500520FE4 /* ctlCandidateUniversal.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ctlCandidateUniversal.swift; sourceTree = ""; }; 5B2DB17127AF8771006D874E /* Makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; name = Makefile; path = Data/Makefile; sourceTree = ""; }; + 5B2F2BB3286216A500B8557B /* vChewingTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = vChewingTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 5B2F2BB5286216A500B8557B /* vChewingTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = vChewingTests.swift; sourceTree = ""; }; 5B30F11227BA568800484E24 /* vChewingKeyLayout.bundle */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.plug-in"; path = vChewingKeyLayout.bundle; sourceTree = ""; }; 5B3133BE280B229700A4A505 /* KeyHandler_States.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_States.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; - 5B3A87BB28597CDB0090E163 /* SymbolNode.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SymbolNode.swift; sourceTree = ""; }; + 5B3A87BB28597CDB0090E163 /* LMSymbolNode.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LMSymbolNode.swift; sourceTree = ""; }; 5B407309281672610023DFFF /* lmAssociates.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmAssociates.swift; sourceTree = ""; usesTabs = 0; }; 5B40730A281672610023DFFF /* lmReplacements.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmReplacements.swift; sourceTree = ""; usesTabs = 0; }; 5B54E742283A7D89001ECBDC /* lmCoreNS.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmCoreNS.swift; sourceTree = ""; }; @@ -287,7 +297,7 @@ 6A0D4F1515FC0EB100ABF4B3 /* 1_Compositor.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 1_Compositor.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 0_Megrez.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; 6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 2_Grid.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; - 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 7_KeyValuePair.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; + 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePaired.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 7_KeyValuePaired.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; 6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 5_LanguageModel.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 4_Node.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; 6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 3_NodeAnchor.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; @@ -322,6 +332,13 @@ /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ + 5B2F2BB0286216A500B8557B /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; 5BD05BB527B2A429004C4F1D /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -370,6 +387,14 @@ name = MiscRootFiles; sourceTree = ""; }; + 5B2F2BB4286216A500B8557B /* vChewingTests */ = { + isa = PBXGroup; + children = ( + 5B2F2BB5286216A500B8557B /* vChewingTests.swift */, + ); + path = vChewingTests; + sourceTree = ""; + }; 5B407308281672610023DFFF /* SubLMs */ = { isa = PBXGroup; children = ( @@ -464,8 +489,8 @@ 5B407308281672610023DFFF /* SubLMs */, 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */, 5BD0113A28180D6100609769 /* LMInstantiator.swift */, + 5B3A87BB28597CDB0090E163 /* LMSymbolNode.swift */, 5BAEFACF28012565001F42C9 /* mgrLangModel.swift */, - 5B3A87BB28597CDB0090E163 /* SymbolNode.swift */, ); path = LangModelRelated; sourceTree = ""; @@ -702,6 +727,7 @@ 6ACA41E715FC1D9000935EF6 /* Installer */, 6A0D4EC215FC0D3C00ABF4B3 /* Source */, 5BD05BB927B2A429004C4F1D /* UserPhraseEditor */, + 5B2F2BB4286216A500B8557B /* vChewingTests */, 6A0D4EA315FC0D2D00ABF4B3 /* Products */, D47D73C127A7200500255A50 /* Frameworks */, 5BDC5CB127C28E8B00E1CCE2 /* KeyboardExtension */, @@ -715,6 +741,7 @@ 6A0D4EA215FC0D2D00ABF4B3 /* vChewing.app */, 6ACA41CB15FC1D7500935EF6 /* vChewingInstaller.app */, 5BD05BB827B2A429004C4F1D /* vChewingPhraseEditor.app */, + 5B2F2BB3286216A500B8557B /* vChewingTests.xctest */, ); name = Products; sourceTree = ""; @@ -769,7 +796,7 @@ 6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */, 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */, 6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */, - 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */, + 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePaired.swift */, ); path = Megrez; sourceTree = ""; @@ -802,6 +829,24 @@ /* End PBXGroup section */ /* Begin PBXNativeTarget section */ + 5B2F2BB2286216A500B8557B /* vChewingTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 5B2F2BBB286216A500B8557B /* Build configuration list for PBXNativeTarget "vChewingTests" */; + buildPhases = ( + 5B2F2BAF286216A500B8557B /* Sources */, + 5B2F2BB0286216A500B8557B /* Frameworks */, + 5B2F2BB1286216A500B8557B /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 5B2F2BB8286216A500B8557B /* PBXTargetDependency */, + ); + name = vChewingTests; + productName = vChewingTests; + productReference = 5B2F2BB3286216A500B8557B /* vChewingTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; 5BD05BB727B2A429004C4F1D /* vChewingPhraseEditor */ = { isa = PBXNativeTarget; buildConfigurationList = 5BD05BC927B2A42A004C4F1D /* Build configuration list for PBXNativeTarget "vChewingPhraseEditor" */; @@ -870,9 +915,13 @@ 6A0D4E9415FC0CFA00ABF4B3 /* Project object */ = { isa = PBXProject; attributes = { - LastSwiftUpdateCheck = 1320; + LastSwiftUpdateCheck = 1340; LastUpgradeCheck = 1400; TargetAttributes = { + 5B2F2BB2286216A500B8557B = { + CreatedOnToolsVersion = 13.4.1; + TestTargetID = 6A0D4EA115FC0D2D00ABF4B3; + }; 5BD05BB727B2A429004C4F1D = { CreatedOnToolsVersion = 13.2; LastSwiftMigration = 1320; @@ -909,11 +958,19 @@ 6A0D4EA115FC0D2D00ABF4B3 /* vChewing */, 6ACA41CA15FC1D7500935EF6 /* vChewingInstaller */, 5BD05BB727B2A429004C4F1D /* vChewingPhraseEditor */, + 5B2F2BB2286216A500B8557B /* vChewingTests */, ); }; /* End PBXProject section */ /* Begin PBXResourcesBuildPhase section */ + 5B2F2BB1286216A500B8557B /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; 5BD05BB627B2A429004C4F1D /* Resources */ = { isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; @@ -1029,6 +1086,14 @@ /* End PBXShellScriptBuildPhase section */ /* Begin PBXSourcesBuildPhase section */ + 5B2F2BAF286216A500B8557B /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 5B2F2BB6286216A500B8557B /* vChewingTests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; 5BD05BB427B2A429004C4F1D /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -1071,7 +1136,7 @@ D456576E279E4F7B00DF6BC9 /* InputSignal.swift in Sources */, 5BA9FD1027FEDB6B002DE248 /* suiPrefPaneKeyboard.swift in Sources */, 5B3133BF280B229700A4A505 /* KeyHandler_States.swift in Sources */, - 5B3A87BC28597CDB0090E163 /* SymbolNode.swift in Sources */, + 5B3A87BC28597CDB0090E163 /* LMSymbolNode.swift in Sources */, 5BA9FD4327FEF3C8002DE248 /* Preferences.swift in Sources */, 5BA9FD4427FEF3C8002DE248 /* SegmentedControlStyleViewController.swift in Sources */, D47F7DCE278BFB57002F9DD7 /* ctlPrefWindow.swift in Sources */, @@ -1084,7 +1149,7 @@ 5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */, 5B54E743283A7D89001ECBDC /* lmCoreNS.swift in Sources */, 5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */, - 5B38F59B281E2E49007D5F5D /* 7_KeyValuePair.swift in Sources */, + 5B38F59B281E2E49007D5F5D /* 7_KeyValuePaired.swift in Sources */, 5B62A33627AE795800A19448 /* mgrPrefs.swift in Sources */, 5B38F5A4281E2E49007D5F5D /* 5_LanguageModel.swift in Sources */, 5BAEFAD028012565001F42C9 /* mgrLangModel.swift in Sources */, @@ -1137,6 +1202,11 @@ target = 5BD05BB727B2A429004C4F1D /* vChewingPhraseEditor */; targetProxy = 5B0AF8B227B2C4E20096FE54 /* PBXContainerItemProxy */; }; + 5B2F2BB8286216A500B8557B /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 6A0D4EA115FC0D2D00ABF4B3 /* vChewing */; + targetProxy = 5B2F2BB7286216A500B8557B /* PBXContainerItemProxy */; + }; 5B707CEA27D9F47A0099EF99 /* PBXTargetDependency */ = { isa = PBXTargetDependency; productRef = 5B707CE927D9F47A0099EF99 /* OpenCC */; @@ -1284,6 +1354,84 @@ /* End PBXVariantGroup section */ /* Begin XCBuildConfiguration section */ + 5B2F2BB9286216A500B8557B /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + BUNDLE_LOADER = "$(TEST_HOST)"; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++17"; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1972; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GENERATE_INFOPLIST_FILE = YES; + MACOSX_DEPLOYMENT_TARGET = 12.3; + MARKETING_VERSION = 1.7.2; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.vChewingTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = macosx; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_EMIT_LOC_STRINGS = NO; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 5.0; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/vChewing.app/Contents/MacOS/vChewing"; + }; + name = Debug; + }; + 5B2F2BBA286216A500B8557B /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + BUNDLE_LOADER = "$(TEST_HOST)"; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++17"; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1972; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GENERATE_INFOPLIST_FILE = YES; + MACOSX_DEPLOYMENT_TARGET = 12.3; + MARKETING_VERSION = 1.7.2; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.vChewingTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = macosx; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_EMIT_LOC_STRINGS = NO; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + SWIFT_VERSION = 5.0; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/vChewing.app/Contents/MacOS/vChewing"; + }; + name = Release; + }; 5BD05BC727B2A42A004C4F1D /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -1304,7 +1452,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1971; + CURRENT_PROJECT_VERSION = 1972; DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = dwarf; GCC_C_LANGUAGE_STANDARD = gnu11; @@ -1328,7 +1476,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.7.1; + MARKETING_VERSION = 1.7.2; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.vChewing.vChewingPhraseEditor; @@ -1361,7 +1509,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1971; + CURRENT_PROJECT_VERSION = 1972; DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; @@ -1381,7 +1529,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.7.1; + MARKETING_VERSION = 1.7.2; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.vChewing.vChewingPhraseEditor; @@ -1499,7 +1647,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1971; + CURRENT_PROJECT_VERSION = 1972; DEAD_CODE_STRIPPING = YES; DEVELOPMENT_ASSET_PATHS = ""; DEVELOPMENT_TEAM = ""; @@ -1535,7 +1683,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.7.1; + MARKETING_VERSION = 1.7.2; ONLY_ACTIVE_ARCH = YES; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.inputmethod.vChewing; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -1567,7 +1715,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1971; + CURRENT_PROJECT_VERSION = 1972; DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEVELOPMENT_ASSET_PATHS = ""; @@ -1598,7 +1746,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.7.1; + MARKETING_VERSION = 1.7.2; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.inputmethod.vChewing; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = ""; @@ -1625,7 +1773,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1971; + CURRENT_PROJECT_VERSION = 1972; DEAD_CODE_STRIPPING = YES; DEVELOPMENT_TEAM = ""; GCC_C_LANGUAGE_STANDARD = gnu99; @@ -1651,7 +1799,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.7.1; + MARKETING_VERSION = 1.7.2; ONLY_ACTIVE_ARCH = YES; PRODUCT_BUNDLE_IDENTIFIER = "org.atelierInmu.vChewing.${PRODUCT_NAME:rfc1034identifier}"; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -1678,7 +1826,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1971; + CURRENT_PROJECT_VERSION = 1972; DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEVELOPMENT_TEAM = ""; @@ -1699,7 +1847,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.7.1; + MARKETING_VERSION = 1.7.2; PRODUCT_BUNDLE_IDENTIFIER = "org.atelierInmu.vChewing.${PRODUCT_NAME:rfc1034identifier}"; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = ""; @@ -1713,6 +1861,15 @@ /* End XCBuildConfiguration section */ /* Begin XCConfigurationList section */ + 5B2F2BBB286216A500B8557B /* Build configuration list for PBXNativeTarget "vChewingTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 5B2F2BB9286216A500B8557B /* Debug */, + 5B2F2BBA286216A500B8557B /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; 5BD05BC927B2A42A004C4F1D /* Build configuration list for PBXNativeTarget "vChewingPhraseEditor" */ = { isa = XCConfigurationList; buildConfigurations = ( diff --git a/vChewing.xcodeproj/xcshareddata/xcschemes/vChewing.xcscheme b/vChewing.xcodeproj/xcshareddata/xcschemes/vChewing.xcscheme index 3ef1fe15..3216c79f 100644 --- a/vChewing.xcodeproj/xcshareddata/xcschemes/vChewing.xcscheme +++ b/vChewing.xcodeproj/xcshareddata/xcschemes/vChewing.xcscheme @@ -28,6 +28,16 @@ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" shouldUseLaunchSchemeArgsEnv = "YES"> + + + + - - diff --git a/vChewingDebug.xcworkspace/xcshareddata/xcschemes/vChewing.xcscheme b/vChewingDebug.xcworkspace/xcshareddata/xcschemes/vChewing.xcscheme deleted file mode 100644 index f0c8830b..00000000 --- a/vChewingDebug.xcworkspace/xcshareddata/xcschemes/vChewing.xcscheme +++ /dev/null @@ -1,47 +0,0 @@ - - - - - - - - - - - - - - - - - - - diff --git a/vChewingTests/vChewingTests.swift b/vChewingTests/vChewingTests.swift new file mode 100644 index 00000000..a0c5d878 --- /dev/null +++ b/vChewingTests/vChewingTests.swift @@ -0,0 +1,29 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +import XCTest + +class vChewingTests: XCTestCase { + +}