diff --git a/Source/Modules/ControllerModules/KeyHandler_Core.swift b/Source/Modules/ControllerModules/KeyHandler_Core.swift index 429bb248..621853be 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Core.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Core.swift @@ -38,13 +38,10 @@ public class KeyHandler { /// 檢測是否內容為空(注拼槽與組字器都是空的) var isTypingContentEmpty: Bool { composer.isEmpty && compositor.isEmpty } - /// 規定最大動態爬軌範圍。組字器內超出該範圍的節錨都會被自動標記為「已經手動選字過」,減少爬軌運算負擔。 - let kMaxComposingBufferNeedsToWalkSize = Int(max(12, ceil(Double(mgrPrefs.composingBufferSize) / 2))) var composer: Tekkon.Composer = .init() // 注拼槽 var compositor: Megrez.Compositor // 組字器 var currentLM: vChewing.LMInstantiator = .init() // 當前主語言模組 var currentUOM: vChewing.LMUserOverride = .init() // 當前半衰記憶模組 - var walkedAnchors: [Megrez.NodeAnchor] { compositor.walkedAnchors } // 用以記錄爬過的節錨的陣列 /// 委任物件 (ctlInputMethod),以便呼叫其中的函式。 var delegate: KeyHandlerDelegate? @@ -72,7 +69,7 @@ public class KeyHandler { /// 初期化。 public init() { /// 組字器初期化。因為是首次初期化變數,所以這裡不能用 ensureCompositor() 代勞。 - compositor = Megrez.Compositor(lm: currentLM, separator: "-") + compositor = Megrez.Compositor(with: currentLM, separator: "-") /// 注拼槽初期化。 ensureParser() /// 讀取最近的簡繁體模式、且將該屬性內容塞到 inputMode 當中。 @@ -91,7 +88,8 @@ public class KeyHandler { /// /// 威注音對游標前置與游標後置模式採取的候選字節點陣列抓取方法是分離的,且不使用 Node Crossing。 var actualCandidateCursor: Int { - mgrPrefs.useRearCursorMode ? min(compositor.cursor, compositor.length - 1) : max(compositor.cursor, 1) + compositor.cursor + - ((compositor.cursor == compositor.width || !mgrPrefs.useRearCursorMode) && compositor.cursor > 0 ? 1 : 0) } /// 利用給定的讀音鏈來試圖爬取最接近的組字結果(最大相似度估算)。 @@ -116,23 +114,6 @@ public class KeyHandler { } } - /// 在爬取組字結果之前,先將即將從組字區溢出的內容遞交出去。 - /// - /// 在理想狀況之下,組字區多長都無所謂。但是,Viterbi 演算法使用 O(N^2), - /// 會使得運算壓力隨著節錨數量的增加而增大。於是,有必要限定組字區的長度。 - /// 超過該長度的內容會在爬軌之前先遞交出去,使其不再記入最大相似度估算的 - /// 估算對象範圍。用比較形象且生動卻有點噁心的解釋的話,蒼蠅一邊吃一邊屙。 - var commitOverflownCompositionAndWalk: String { - var textToCommit = "" - if compositor.width > mgrPrefs.composingBufferSize, !walkedAnchors.isEmpty { - let anchor: Megrez.NodeAnchor = walkedAnchors[0] - textToCommit = anchor.node.currentPair.value - compositor.removeHeadReadings(count: anchor.spanLength) - } - walk() - return textToCommit - } - /// 用以組建聯想詞陣列的函式。 /// - Parameter key: 給定的聯想詞的開頭字。 /// - Returns: 抓取到的聯想詞陣列。 @@ -151,106 +132,86 @@ public class KeyHandler { /// - value: 給定之候選字字串。 /// - respectCursorPushing: 若該選項為 true,則會在選字之後始終將游標推送至選字後的節錨的前方。 func fixNode(candidate: (String, String), respectCursorPushing: Bool = true) { - let theCandidate: Megrez.KeyValuePaired = .init(key: candidate.0, value: candidate.1) - let adjustedCursor = max(0, min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositor.length)) - // 開始讓半衰模組觀察目前的狀況。 - let selectedNode: Megrez.NodeAnchor = compositor.fixNodeWithCandidate(theCandidate, at: adjustedCursor) - // 不要針對逐字選字模式啟用臨時半衰記憶模型。 - if !mgrPrefs.useSCPCTypingMode { - var addToUserOverrideModel = true - // 所有讀音數與字符數不匹配的情況均不得塞入半衰記憶模組。 - if selectedNode.spanLength != theCandidate.value.count { - IME.prtDebugIntel("UOM: SpanningLength != value.count, dismissing.") - addToUserOverrideModel = false - } - if addToUserOverrideModel { - // 威注音的 SymbolLM 的 Score 是 -12,符合該條件的內容不得塞入半衰記憶模組。 - if selectedNode.node.scoreForPaired(candidate: theCandidate) <= -12 { - IME.prtDebugIntel("UOM: Score <= -12, dismissing.") - addToUserOverrideModel = false - } - } - if addToUserOverrideModel, mgrPrefs.fetchSuggestionsFromUserOverrideModel { - IME.prtDebugIntel("UOM: Start Observation.") - // 這個過程可能會因為使用者半衰記憶模組內部資料錯亂、而導致輸入法在選字時崩潰。 - // 於是在這裡引入災後狀況察覺專用變數,且先開啟該開關。順利執行完觀察後會關閉。 - // 一旦輸入法崩潰,會在重啟時發現這個開關是開著的,屆時 AppDelegate 會做出應對。 - mgrPrefs.failureFlagForUOMObservation = true - // 令半衰記憶模組觀測給定的三元圖。 - // 這個過程會讓半衰引擎根據當前上下文生成三元圖索引鍵。 - currentUOM.observe( - walkedAnchors: walkedAnchors, cursorIndex: adjustedCursor, candidate: theCandidate.value, - timestamp: NSDate().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() } - ) - // 如果沒有出現崩框的話,那就將這個開關復位。 - mgrPrefs.failureFlagForUOMObservation = false - } - } - + let actualCursor = actualCandidateCursor + let theCandidate: Megrez.Compositor.Candidate = .init(key: candidate.0, value: candidate.1) + if !compositor.overrideCandidate(theCandidate, at: actualCursor) { return } // 開始爬軌。 walk() + // 在可行的情況下更新使用者半衰記憶模組。 + var accumulatedCursor = 0 + var currentNode: Megrez.Compositor.Node? + for node in compositor.walkedNodes { + accumulatedCursor += node.spanLength + if accumulatedCursor > actualCursor { + currentNode = node + break + } + } + guard let currentNode = currentNode else { return } + + if currentNode.currentUnigram.score > -12 { + IME.prtDebugIntel("UOM: Start Observation.") + // 這個過程可能會因為使用者半衰記憶模組內部資料錯亂、而導致輸入法在選字時崩潰。 + // 於是在這裡引入災後狀況察覺專用變數,且先開啟該開關。順利執行完觀察後會關閉。 + // 一旦輸入法崩潰,會在重啟時發現這個開關是開著的,屆時 AppDelegate 會做出應對。 + mgrPrefs.failureFlagForUOMObservation = true + // 令半衰記憶模組觀測給定的三元圖。 + // 這個過程會讓半衰引擎根據當前上下文生成三元圖索引鍵。 + currentUOM.observe( + walkedNodes: compositor.walkedNodes, cursorIndex: actualCursor, candidate: theCandidate.value, + timestamp: NSDate().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() } + ) + // 如果沒有出現崩框的話,那就將這個開關復位。 + mgrPrefs.failureFlagForUOMObservation = false + } + /// 若偏好設定內啟用了相關選項,則會在選字之後始終將游標推送至選字後的節錨的前方。 if mgrPrefs.moveCursorAfterSelectingCandidate, respectCursorPushing { + // compositor.cursor = accumulatedCursor compositor.jumpCursorBySpan(to: .front) } } - /// 組字器內超出最大動態爬軌範圍的節錨都會被自動標記為「已經手動選字過」,減少爬軌運算負擔。 - func markNodesFixedIfNecessary() { - let width = compositor.width - if width <= kMaxComposingBufferNeedsToWalkSize { - return - } - var index = 0 - for anchor in walkedAnchors { - if index >= width - kMaxComposingBufferNeedsToWalkSize { break } - if anchor.node.score < Megrez.Node.kSelectedCandidateScore { - compositor.fixNodeWithCandidate(anchor.node.currentPair, at: index + anchor.spanLength) - } - index += anchor.spanLength - } - } - /// 獲取候選字詞(包含讀音)陣列資料內容。 func getCandidatesArray(fixOrder: Bool = true) -> [(String, String)] { - var arrAnchors: [Megrez.NodeAnchor] = rawAnchorsOfNodes - var arrCandidates: [Megrez.KeyValuePaired] = .init() + /// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。 + /// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的。 + var arrCandidates: [Megrez.Compositor.Candidate] = { + switch mgrPrefs.useRearCursorMode { + case false: + return compositor.fetchCandidates(at: actualCandidateCursor, filter: .endAt) + case true: + return compositor.fetchCandidates(at: actualCandidateCursor, filter: .beginAt) + } + }() /// 原理:nodes 這個回饋結果包含一堆子陣列,分別對應不同詞長的候選字。 /// 這裡先對陣列排序、讓最長候選字的子陣列的優先權最高。 /// 這個過程不會傷到子陣列內部的排序。 - if arrAnchors.isEmpty { return .init() } + if arrCandidates.isEmpty { return .init() } - // 讓更長的節錨排序靠前。 - arrAnchors = arrAnchors.stableSort { $0.spanLength > $1.spanLength } - - // 將節錨內的候選字詞資料拓印到輸出陣列內。 - for currentCandidate in arrAnchors.map(\.node.candidates).joined() { - // 選字窗的內容的康熙轉換 / JIS 轉換不能放在這裡處理,會影響選字有效性。 - // 選字的原理是拿著具體的候選字詞的字串去當前的節錨下找出對應的候選字詞(X元圖)。 - // 一旦在這裡轉換了,節錨內的某些元圖就無法被選中。 - arrCandidates.append(currentCandidate) - } // 決定是否根據半衰記憶模組的建議來調整候選字詞的順序。 if !mgrPrefs.fetchSuggestionsFromUserOverrideModel || mgrPrefs.useSCPCTypingMode || fixOrder { return arrCandidates.map { ($0.key, $0.value) } } - let arrSuggestedUnigrams: [Megrez.Unigram] = fetchSuggestedCandidates().stableSort { $0.score > $1.score } - let arrSuggestedCandidates: [Megrez.KeyValuePaired] = arrSuggestedUnigrams.map(\.keyValue) + let arrSuggestedUnigrams: [(String, Megrez.Unigram)] = fetchSuggestedCandidates() + let arrSuggestedCandidates: [Megrez.Compositor.Candidate] = arrSuggestedUnigrams.map { + Megrez.Compositor.Candidate(key: $0.0, value: $0.1.value) + } arrCandidates = arrSuggestedCandidates.filter { arrCandidates.contains($0) } + arrCandidates arrCandidates = arrCandidates.deduplicate arrCandidates = arrCandidates.stableSort { $0.key.split(separator: "-").count > $1.key.split(separator: "-").count } return arrCandidates.map { ($0.key, $0.value) } } - /// 向半衰引擎詢問可能的選字建議。拿到的結果會是一個單元圖陣列。 - func fetchSuggestedCandidates() -> [Megrez.Unigram] { + /// 向半衰引擎詢問可能的選字建議。拿到的結果會是一個單元圖陣列,會自動按權重排序。 + func fetchSuggestedCandidates() -> [(String, Megrez.Unigram)] { currentUOM.suggest( - walkedAnchors: walkedAnchors, cursorIndex: compositor.cursor, + walkedNodes: compositor.walkedNodes, cursorIndex: compositor.cursor, timestamp: NSDate().timeIntervalSince1970 - ) + ).stableSort { $0.1.score > $1.1.score } } /// 向半衰引擎詢問可能的選字建議、且套用給組字器內的當前游標位置。 @@ -260,31 +221,19 @@ public class KeyHandler { /// 如果這個開關沒打開的話,直接放棄執行這個函式。 if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return } /// 先就當前上下文讓半衰引擎重新生成三元圖索引鍵。 - let overrideValue = fetchSuggestedCandidates().first?.keyValue.value ?? "" + let overrideValue = fetchSuggestedCandidates().first?.1.value ?? "" /// 再拿著索引鍵去問半衰模組有沒有選字建議。有的話就遵循之、讓天權星引擎對指定節錨下的節點複寫權重。 if !overrideValue.isEmpty { IME.prtDebugIntel( "UOM: Suggestion retrieved, overriding the node score of the selected candidate.") - compositor.overrideNodeScoreForSelectedCandidate( - location: min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositor.length), - value: overrideValue, - overridingScore: findHighestScore(nodeAnchors: rawAnchorsOfNodes, epsilon: kEpsilon) - ) + // TODO: 這裡回頭改成用詞音配對來覆寫的形式。 + compositor.overrideCandidateLiteral(overrideValue, at: actualCandidateCursor, overrideType: .withTopUnigramScore) } else { IME.prtDebugIntel("UOM: Blank suggestion retrieved, dismissing.") } } - /// 就給定的節錨陣列,根據半衰模組的衰減指數,來找出最高權重數值。 - /// - Parameters: - /// - nodes: 給定的節錨陣列。 - /// - epsilon: 半衰模組的衰減指數。 - /// - Returns: 尋獲的最高權重數值。 - func findHighestScore(nodeAnchors: [Megrez.NodeAnchor], epsilon: Double) -> Double { - nodeAnchors.map(\.node.highestUnigramScore).max() ?? 0 + epsilon - } - // MARK: - Extracted methods and functions (Tekkon). /// 獲取與當前注音排列或拼音輸入種類有關的標點索引鍵,以英數下畫線「_」結尾。 @@ -335,15 +284,6 @@ public class KeyHandler { // MARK: - Extracted methods and functions (Megrez). - /// 獲取原始節錨資料陣列。 - var rawAnchorsOfNodes: [Megrez.NodeAnchor] { - /// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。 - /// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的。 - mgrPrefs.useRearCursorMode - ? compositor.nodesBeginningAt(location: actualCandidateCursor) - : compositor.nodesEndingAt(location: actualCandidateCursor) - } - /// 將輸入法偏好設定同步至語言模組內。 func syncBaseLMPrefs() { currentLM.isPhraseReplacementEnabled = mgrPrefs.phraseReplacementEnabled @@ -354,7 +294,7 @@ public class KeyHandler { /// 令組字器重新初期化,使其與被重新指派過的主語言模組對接。 func ensureCompositor() { // 每個漢字讀音都由一個西文半形減號分隔開。 - compositor = Megrez.Compositor(lm: currentLM, separator: "-") + compositor = Megrez.Compositor(with: currentLM, separator: "-") } /// 生成標點符號索引鍵。 diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleComposition.swift b/Source/Modules/ControllerModules/KeyHandler_HandleComposition.swift index c816cbce..43d58638 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleComposition.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleComposition.swift @@ -81,23 +81,19 @@ extension KeyHandler { } // 將該讀音插入至組字器內的軌格當中。 - compositor.insertReading(readingKey) + compositor.insertKey(readingKey) // 讓組字器反爬軌格。 - let textToCommit = commitOverflownCompositionAndWalk + walk() // 看看半衰記憶模組是否會對目前的狀態給出自動選字建議。 fetchAndApplySuggestionsFromUserOverrideModel() - // 將組字器內超出最大動態爬軌範圍的節錨都標記為「已經手動選字過」,減少之後的爬軌運算負擔。 - markNodesFixedIfNecessary() - // 之後就是更新組字區了。先清空注拼槽的內容。 composer.clear() // 再以回呼組字狀態的方式來執行 updateClientComposingBuffer()。 let inputting = buildInputtingState - inputting.textToCommit = textToCommit stateCallback(inputting) /// 逐字選字模式的處理。 @@ -106,9 +102,9 @@ extension KeyHandler { state: inputting, isTypingVertical: input.isTypingVertical ) - if choosingCandidates.candidates.count == 1 { - let reading: String = choosingCandidates.candidates.first?.0 ?? "" - let text: String = choosingCandidates.candidates.first?.1 ?? "" + if choosingCandidates.candidates.count == 1, let firstCandidate = choosingCandidates.candidates.first { + let reading: String = firstCandidate.0 + let text: String = firstCandidate.1 stateCallback(InputState.Committing(textToCommit: text)) if !mgrPrefs.associatedPhrasesEnabled { diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift index 595e08c9..a68d345d 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift @@ -163,10 +163,9 @@ extension KeyHandler { stateCallback(InputState.Committing(textToCommit: " ")) stateCallback(InputState.Empty()) } else if currentLM.hasUnigramsFor(key: " ") { - compositor.insertReading(" ") - let textToCommit = commitOverflownCompositionAndWalk + compositor.insertKey(" ") + walk() let inputting = buildInputtingState - inputting.textToCommit = textToCommit stateCallback(inputting) } return true @@ -283,10 +282,9 @@ extension KeyHandler { if input.isOptionHold { if currentLM.hasUnigramsFor(key: "_punctuation_list") { if composer.isEmpty { - compositor.insertReading("_punctuation_list") - let textToCommit: String! = commitOverflownCompositionAndWalk + compositor.insertKey("_punctuation_list") + walk() let inputting = buildInputtingState - inputting.textToCommit = textToCommit stateCallback(inputting) stateCallback(buildCandidate(state: inputting, isTypingVertical: input.isTypingVertical)) } else { // 不要在注音沒敲完整的情況下叫出統合符號選單。 diff --git a/Source/Modules/ControllerModules/KeyHandler_States.swift b/Source/Modules/ControllerModules/KeyHandler_States.swift index 59e283b6..763aa94c 100644 --- a/Source/Modules/ControllerModules/KeyHandler_States.swift +++ b/Source/Modules/ControllerModules/KeyHandler_States.swift @@ -22,21 +22,20 @@ extension KeyHandler { /// 「更新內文組字區 (Update the composing buffer)」是指要求客體軟體將組字緩衝區的內容 /// 換成由此處重新生成的組字字串(NSAttributeString,否則會不顯示)。 var tooltipParameterRef: [String] = ["", ""] - let nodeValuesArray: [String] = walkedAnchors.values + let nodeValuesArray: [String] = compositor.walkedNodes.values var composedStringCursorIndex = 0 var readingCursorIndex = 0 /// IMK 協定的內文組字區的游標長度與游標位置無法正確統計 UTF8 高萬字(比如 emoji)的長度, /// 所以在這裡必須做糾偏處理。因為在用 Swift,所以可以用「.utf16」取代「NSString.length()」。 /// 這樣就可以免除不必要的類型轉換。 - for theAnchor in walkedAnchors { - let theNode = theAnchor.node - let strNodeValue = theNode.currentPair.value + for theNode in compositor.walkedNodes { + let strNodeValue = theNode.value let arrSplit: [String] = Array(strNodeValue).map { String($0) } let codepointCount = arrSplit.count /// 藉下述步驟重新將「可見游標位置」對齊至「組字器內的游標所在的讀音位置」。 /// 每個節錨(NodeAnchor)都有自身的幅位長度(spanningLength),可以用來 /// 累加、以此為依據,來校正「可見游標位置」。 - let spanningLength: Int = theAnchor.spanLength + let spanningLength: Int = theNode.spanLength if readingCursorIndex + spanningLength <= compositor.cursor { composedStringCursorIndex += strNodeValue.utf16.count readingCursorIndex += spanningLength @@ -60,14 +59,14 @@ extension KeyHandler { /// 所以需要上下文工具提示來顯示游標的相對位置。 /// 這裡先計算一下要用在工具提示當中的顯示參數的內容。 switch compositor.cursor { - case compositor.readings.count...: + case compositor.keys.count...: // 這裡的 compositor.cursor 數值不可能大於 readings.count,因為會被 Megrez 自動糾正。 - tooltipParameterRef[0] = compositor.readings[compositor.cursor - 1] + tooltipParameterRef[0] = compositor.keys[compositor.cursor - 1] case 0: - tooltipParameterRef[1] = compositor.readings[compositor.cursor] + tooltipParameterRef[1] = compositor.keys[compositor.cursor] default: - tooltipParameterRef[0] = compositor.readings[compositor.cursor - 1] - tooltipParameterRef[1] = compositor.readings[compositor.cursor] + tooltipParameterRef[0] = compositor.keys[compositor.cursor - 1] + tooltipParameterRef[1] = compositor.keys[compositor.cursor] } } @@ -125,7 +124,7 @@ extension KeyHandler { cursorIndex: currentState.cursorIndex, candidates: getCandidatesArray(fixOrder: mgrPrefs.useFixecCandidateOrderOnSelection), isTypingVertical: isTypingVertical, - nodeValuesArray: walkedAnchors.values + nodeValuesArray: compositor.walkedNodes.values ) } @@ -215,7 +214,7 @@ extension KeyHandler { cursorIndex: state.cursorIndex, markerIndex: index, readings: state.readings, - nodeValuesArray: walkedAnchors.values + nodeValuesArray: compositor.walkedNodes.values ) marking.tooltipForInputting = state.tooltipForInputting stateCallback(marking.markedRange.isEmpty ? marking.convertedToInputting : marking) @@ -237,7 +236,7 @@ extension KeyHandler { cursorIndex: state.cursorIndex, markerIndex: index, readings: state.readings, - nodeValuesArray: walkedAnchors.values + nodeValuesArray: compositor.walkedNodes.values ) marking.tooltipForInputting = state.tooltipForInputting stateCallback(marking.markedRange.isEmpty ? marking.convertedToInputting : marking) @@ -280,10 +279,9 @@ extension KeyHandler { return true } - compositor.insertReading(customPunctuation) - let textToCommit = commitOverflownCompositionAndWalk + compositor.insertKey(customPunctuation) + walk() let inputting = buildInputtingState - inputting.textToCommit = textToCommit stateCallback(inputting) // 從這一行之後開始,就是針對逐字選字模式的單獨處理。 @@ -338,7 +336,7 @@ extension KeyHandler { ) -> Bool { guard state is InputState.Inputting else { return false } - var composingBuffer = compositor.readings.joined(separator: "-") + var composingBuffer = compositor.keys.joined(separator: "-") if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin { composingBuffer = Tekkon.restoreToneOneInZhuyinKey(target: composingBuffer) // 恢復陰平標記 composingBuffer = Tekkon.cnvPhonaToHanyuPinyin(target: composingBuffer) // 注音轉拼音 @@ -368,7 +366,7 @@ extension KeyHandler { var composed = "" - for node in walkedAnchors.map(\.node) { + for node in compositor.walkedNodes { var key = node.key if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin { key = Tekkon.restoreToneOneInZhuyinKey(target: key) // 恢復陰平標記 @@ -379,7 +377,7 @@ extension KeyHandler { key = Tekkon.cnvZhuyinChainToTextbookReading(target: key, newSeparator: " ") } - let value = node.currentPair.value + let value = node.value // 不要給標點符號等特殊元素加注音 composed += key.contains("_") ? value : "\(value)(\(key))" } @@ -416,7 +414,7 @@ extension KeyHandler { composer.clear() } else if composer.isEmpty { if compositor.cursor > 0 { - compositor.dropReading(direction: .rear) + compositor.dropKey(direction: .rear) walk() } else { IME.prtDebugIntel("9D69908D") @@ -468,7 +466,7 @@ extension KeyHandler { } if composer.isEmpty { - compositor.dropReading(direction: .front) + compositor.dropKey(direction: .front) walk() } else { composer.clear() @@ -640,7 +638,7 @@ extension KeyHandler { composingBuffer: currentState.composingBuffer, cursorIndex: currentState.cursorIndex, markerIndex: nextPosition, - readings: compositor.readings + readings: compositor.keys ) marking.tooltipForInputting = currentState.tooltip stateCallback(marking) @@ -714,7 +712,7 @@ extension KeyHandler { composingBuffer: currentState.composingBuffer, cursorIndex: currentState.cursorIndex, markerIndex: previousPosition, - readings: compositor.readings + readings: compositor.keys ) marking.tooltipForInputting = currentState.tooltip stateCallback(marking) @@ -770,7 +768,7 @@ extension KeyHandler { stateCallback: @escaping (InputStateProtocol) -> Void, errorCallback: @escaping () -> Void ) -> Bool { - if composer.isEmpty, compositor.isEmpty || walkedAnchors.isEmpty { return false } + if composer.isEmpty, compositor.isEmpty || compositor.walkedNodes.isEmpty { return false } guard state is InputState.Inputting else { guard state is InputState.Empty else { IME.prtDebugIntel("6044F081") @@ -795,24 +793,27 @@ extension KeyHandler { } var length = 0 - var currentAnchor = Megrez.NodeAnchor() - let cursorIndex = min( - actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositor.length - ) - for anchor in walkedAnchors { - length += anchor.spanLength - if length >= cursorIndex { - currentAnchor = anchor + var currentNode: Megrez.Compositor.Node? + let cursorIndex = actualCandidateCursor + for node in compositor.walkedNodes { + length += node.spanLength + if length > cursorIndex { + currentNode = node break } } - let currentNode = currentAnchor.node - let currentPaired: Megrez.KeyValuePaired = currentNode.currentPair + guard let currentNode = currentNode else { + IME.prtDebugIntel("F58DEA95") + errorCallback() + return true + } + + let currentPaired = (currentNode.key, currentNode.value) var currentIndex = 0 - if currentNode.score < Megrez.Node.kSelectedCandidateScore { - /// 只要是沒有被使用者手動選字過的(節錨下的)節點, + if !currentNode.isOverriden { + /// 如果是沒有被使用者手動選字過的(節錨下的)節點, /// 就從第一個候選字詞開始,這樣使用者在敲字時就會優先匹配 /// 那些字詞長度不小於 2 的單元圖。換言之,如果使用者敲了兩個 /// 注音讀音、卻發現這兩個注音讀音各自的單字權重遠高於由這兩個 @@ -821,14 +822,14 @@ extension KeyHandler { /// (預設情況下是 (Shift+)Tab 來做正 (反) 向切換,但也可以用 /// Shift(+CMD)+Space 或 Alt+↑/↓ 來切換(縱排輸入時則是 Alt+←/→)、 /// 以應對臉書綁架 Tab 鍵的情況。 - if candidates[0].0 == currentPaired.key, candidates[0].1 == currentPaired.value { + if candidates[0] == currentPaired { /// 如果第一個候選字詞是當前節點的候選字詞的值的話, /// 那就切到下一個(或上一個,也就是最後一個)候選字詞。 currentIndex = reverseModifier ? candidates.count - 1 : 1 } } else { for candidate in candidates { - if candidate.0 == currentPaired.key, candidate.1 == currentPaired.value { + if candidate == currentPaired { if reverseModifier { if currentIndex == 0 { currentIndex = candidates.count - 1 diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index 38a9e019..e0c3c681 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -175,14 +175,8 @@ extension vChewing { /// - Parameter key: 給定的讀音字串。 /// - Returns: 對應的經過處理的單元圖陣列。 public func unigramsFor(key: String) -> [Megrez.Unigram] { - if key == " " { - /// 給空格鍵指定輸出值。 - let spaceUnigram = Megrez.Unigram( - keyValue: Megrez.KeyValuePaired(key: " ", value: " "), - score: 0 - ) - return [spaceUnigram] - } + /// 給空格鍵指定輸出值。 + if key == " " { return [.init(value: " ")] } /// 準備不同的語言模組容器,開始逐漸往容器陣列內塞入資料。 var rawAllUnigrams: [Megrez.Unigram] = [] @@ -209,11 +203,11 @@ extension vChewing { rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: key)) // 準備過濾清單。因為我們在 Swift 使用 NSOrderedSet,所以就不需要統計清單了。 - var filteredPairs: Set = [] + var filteredPairs: Set = [] // 載入要過濾的 KeyValuePair 清單。 for unigram in lmFiltered.unigramsFor(key: key) { - filteredPairs.insert(unigram.keyValue) + filteredPairs.insert(unigram.value) } return filterAndTransform( @@ -243,9 +237,6 @@ extension vChewing { lmAssociates.hasValuesFor(pair: pair) } - /// 該函式不起作用,僅用來滿足 LangModelProtocol 協定的要求。 - public func bigramsFor(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() } - // MARK: - 核心函式(對內) /// 給定單元圖原始結果陣列,經過語彙過濾處理+置換處理+去重複處理之後,給出單元圖結果陣列。 @@ -255,20 +246,20 @@ extension vChewing { /// - Returns: 經過語彙過濾處理+置換處理+去重複處理的單元圖結果陣列。 func filterAndTransform( unigrams: [Megrez.Unigram], - filter filteredPairs: Set + filter filteredPairs: Set ) -> [Megrez.Unigram] { var results: [Megrez.Unigram] = [] - var insertedPairs: Set = [] + var insertedPairs: Set = [] for unigram in unigrams { - var pair: Megrez.KeyValuePaired = unigram.keyValue - if filteredPairs.contains(pair) { continue } + var theValue: String = unigram.value + if filteredPairs.contains(theValue) { continue } if isPhraseReplacementEnabled { - let replacement = lmReplacements.valuesFor(key: pair.value) - if !replacement.isEmpty { pair.value = replacement } + let replacement = lmReplacements.valuesFor(key: theValue) + if !replacement.isEmpty { theValue = replacement } } - if insertedPairs.contains(pair) { continue } - results.append(Megrez.Unigram(keyValue: pair, score: unigram.score)) - insertedPairs.insert(pair) + if insertedPairs.contains(theValue) { continue } + results.append(Megrez.Unigram(value: theValue, score: unigram.score)) + insertedPairs.insert(theValue) } return results } diff --git a/Source/Modules/LangModelRelated/LMInstantiator_DateTimeExtension.swift b/Source/Modules/LangModelRelated/LMInstantiator_DateTimeExtension.swift index e6987ee6..3798d91d 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator_DateTimeExtension.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator_DateTimeExtension.swift @@ -31,9 +31,9 @@ extension vChewing.LMInstantiator { var date3 = ChineseConverter.convertArabicNumeralsToChinese(target: date2) date3 = date3.replacingOccurrences(of: "年〇", with: "年") date3 = date3.replacingOccurrences(of: "月〇", with: "月") - results.append(.init(keyValue: .init(key: key, value: date1), score: -94)) - results.append(.init(keyValue: .init(key: key, value: date2), score: -95)) - results.append(.init(keyValue: .init(key: key, value: date3), score: -96)) + results.append(.init(value: date1, score: -94)) + results.append(.init(value: date2, score: -95)) + results.append(.init(value: date3, score: -96)) if let currentDateShortened = currentDateShortened, delta.year != 0 { var dateAlt1: String = formatterDate1.string(from: currentDateShortened) dateAlt1.regReplace(pattern: #"^0+"#) @@ -42,9 +42,9 @@ extension vChewing.LMInstantiator { var dateAlt3 = ChineseConverter.convertArabicNumeralsToChinese(target: dateAlt2) dateAlt3 = dateAlt3.replacingOccurrences(of: "年〇", with: "年") dateAlt3 = dateAlt3.replacingOccurrences(of: "月〇", with: "月") - results.append(.init(keyValue: .init(key: key, value: dateAlt1), score: -97)) - results.append(.init(keyValue: .init(key: key, value: dateAlt2), score: -98)) - results.append(.init(keyValue: .init(key: key, value: dateAlt3), score: -99)) + results.append(.init(value: dateAlt1, score: -97)) + results.append(.init(value: dateAlt2, score: -98)) + results.append(.init(value: dateAlt3, score: -99)) } case "ㄕˊ-ㄐㄧㄢ": let formatterTime1 = DateFormatter() @@ -56,9 +56,9 @@ extension vChewing.LMInstantiator { let time1 = formatterTime1.string(from: currentDate) let time2 = formatterTime2.string(from: currentDate) let time3 = formatterTime3.string(from: currentDate) - results.append(.init(keyValue: .init(key: key, value: time1), score: -97)) - results.append(.init(keyValue: .init(key: key, value: time2), score: -98)) - results.append(.init(keyValue: .init(key: key, value: time3), score: -99)) + results.append(.init(value: time1, score: -97)) + results.append(.init(value: time2, score: -98)) + results.append(.init(value: time3, score: -99)) case "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ": let formatterWeek1 = DateFormatter() let formatterWeek2 = DateFormatter() @@ -68,8 +68,8 @@ extension vChewing.LMInstantiator { formatterWeek2.locale = theLocale let week1 = formatterWeek1.string(from: currentDate) let week2 = formatterWeek2.string(from: currentDate) - results.append(.init(keyValue: .init(key: key, value: week1), score: -98)) - results.append(.init(keyValue: .init(key: key, value: week2), score: -99)) + results.append(.init(value: week1, score: -98)) + results.append(.init(value: week2, score: -99)) default: return .init() } return results diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift index 4556c78b..5879eb95 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift @@ -115,18 +115,6 @@ extension vChewing { IME.prtDebugIntel(strDump) } - /// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成雙元圖陣列。 - /// - /// 威注音輸入法尚未引入雙元圖支援,所以該函式並未擴充相關功能,自然不會起作用。 - /// - parameters: - /// - precedingKey: 前述讀音索引鍵。 - /// - key: 當前讀音索引鍵。 - public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] { - // 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。 - // 其實只要一句「[Megrez.Bigram]()」就夠了。 - precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() - } - /// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成單元圖陣列。 /// - parameters: /// - key: 讀音索引鍵。 @@ -136,7 +124,6 @@ extension vChewing { for netaRange in arrRangeRecords { let neta = strData[netaRange].split(separator: " ") let theValue: String = shouldReverse ? String(neta[0]) : String(neta[1]) - let kvPair = Megrez.KeyValuePaired(key: key, value: theValue) var theScore = defaultScore if neta.count >= 3, !shouldForceDefaultScore, !neta[2].contains("#") { theScore = .init(String(neta[2])) ?? defaultScore @@ -144,7 +131,7 @@ extension vChewing { if theScore > 0 { theScore *= -1 // 應對可能忘記寫負號的情形 } - grams.append(Megrez.Unigram(keyValue: kvPair, score: theScore)) + grams.append(Megrez.Unigram(value: theValue, score: theScore)) } } return grams diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift index c513ef21..739df459 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift @@ -108,18 +108,6 @@ extension vChewing { IME.prtDebugIntel(strDump) } - /// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成雙元圖陣列。 - /// - /// 威注音輸入法尚未引入雙元圖支援,所以該函式並未擴充相關功能,自然不會起作用。 - /// - parameters: - /// - precedingKey: 前述讀音索引鍵。 - /// - key: 當前讀音索引鍵。 - public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] { - // 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。 - // 其實只要一句「[Megrez.Bigram]()」就夠了。 - precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() - } - /// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成單元圖陣列。 /// - parameters: /// - key: 讀音索引鍵。 @@ -130,7 +118,6 @@ extension vChewing { let strNetaSet = String(decoding: netaSet, as: UTF8.self) let neta = Array(strNetaSet.split(separator: " ").reversed()) let theValue: String = .init(neta[0]) - let kvPair = Megrez.KeyValuePaired(key: key, value: theValue) var theScore = defaultScore if neta.count >= 2, !shouldForceDefaultScore { theScore = .init(String(neta[1])) ?? defaultScore @@ -138,7 +125,7 @@ extension vChewing { if theScore > 0 { theScore *= -1 // 應對可能忘記寫負號的情形 } - grams.append(Megrez.Unigram(keyValue: kvPair, score: theScore)) + grams.append(Megrez.Unigram(value: theValue, score: theScore)) } } return grams diff --git a/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift index d5efd7bb..eb731d14 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift @@ -27,13 +27,13 @@ extension vChewing { } public func observe( - walkedAnchors: [Megrez.NodeAnchor], + walkedNodes: [Megrez.Compositor.Node], cursorIndex: Int, candidate: String, timestamp: Double, saveCallback: @escaping () -> Void ) { - let key = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex) + let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex) guard !key.isEmpty else { return } guard mutLRUMap[key] != nil else { @@ -57,7 +57,7 @@ extension vChewing { // 降低磁碟寫入次數。唯有失憶的情況下才會更新觀察且記憶。 if var theNeta = mutLRUMap[key] { _ = suggest( - walkedAnchors: walkedAnchors, cursorIndex: cursorIndex, timestamp: timestamp, + walkedNodes: walkedNodes, cursorIndex: cursorIndex, timestamp: timestamp, decayCallback: { theNeta.observation.update(candidate: candidate, timestamp: timestamp) self.mutLRUList.insert(theNeta, at: 0) @@ -70,17 +70,17 @@ extension vChewing { } public func suggest( - walkedAnchors: [Megrez.NodeAnchor], + walkedNodes: [Megrez.Compositor.Node], cursorIndex: Int, timestamp: Double, decayCallback: @escaping () -> Void = {} - ) -> [Megrez.Unigram] { - let key = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex) + ) -> [(String, Megrez.Unigram)] { + let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex) guard !key.isEmpty else { IME.prtDebugIntel("UOM: Blank key generated on suggestion, aborting suggestion.") return .init() } - let currentReadingKey = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex, readingOnly: true) + let currentReadingKey = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex, readingOnly: true) guard let koPair = mutLRUMap[key] else { IME.prtDebugIntel("UOM: mutLRUMap[key] is nil, throwing blank suggestion for key: \(key).") return .init() @@ -88,7 +88,7 @@ extension vChewing { let observation = koPair.observation - var arrResults = [Megrez.Unigram]() + var arrResults = [(String, Megrez.Unigram)]() var currentHighScore = 0.0 for overrideNeta in Array(observation.overrides) { let override: Override = overrideNeta.value @@ -111,10 +111,8 @@ extension vChewing { ) if (0...currentHighScore).contains(overrideDetectionScore) { decayCallback() } - let newUnigram = Megrez.Unigram( - keyValue: .init(key: currentReadingKey, value: overrideNeta.key), score: overrideScore - ) - arrResults.insert(newUnigram, at: 0) + let newUnigram = Megrez.Unigram(value: overrideNeta.key, score: overrideScore) + arrResults.insert((currentReadingKey, newUnigram), at: 0) currentHighScore = overrideScore } if arrResults.isEmpty { @@ -137,12 +135,12 @@ extension vChewing { } func convertKeyFrom( - walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false + walkedNodes: [Megrez.Compositor.Node], cursorIndex: Int, readingOnly: Bool = false ) -> String { let whiteList = "你他妳她祢衪它牠再在" - var arrNodes: [Megrez.NodeAnchor] = [] + var arrNodes: [Megrez.Compositor.Node] = [] var intLength = 0 - for theNodeAnchor in walkedAnchors { + for theNodeAnchor in walkedNodes { arrNodes.append(theNodeAnchor) intLength += theNodeAnchor.spanLength if intLength >= cursorIndex { @@ -154,7 +152,7 @@ extension vChewing { arrNodes = Array(arrNodes.reversed()) - let kvCurrent = arrNodes[0].node.currentPair + let kvCurrent = arrNodes[0].currentPair guard !kvCurrent.key.contains("_") else { return "" } @@ -183,7 +181,7 @@ extension vChewing { !kvPrevious.key.contains("_"), kvPrevious.key.split(separator: "-").count == kvPrevious.value.count { - kvPrevious = arrNodes[1].node.currentPair + kvPrevious = arrNodes[1].currentPair readingStack = kvPrevious.key + readingStack } @@ -191,7 +189,7 @@ extension vChewing { !kvAnterior.key.contains("_"), kvAnterior.key.split(separator: "-").count == kvAnterior.value.count { - kvAnterior = arrNodes[2].node.currentPair + kvAnterior = arrNodes[2].currentPair readingStack = kvAnterior.key + readingStack } diff --git a/Source/Modules/LangModelRelated/mgrLangModel.swift b/Source/Modules/LangModelRelated/mgrLangModel.swift index 6815cf7d..3fc48028 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.swift +++ b/Source/Modules/LangModelRelated/mgrLangModel.swift @@ -184,7 +184,7 @@ enum mgrLangModel { (mode == InputMode.imeModeCHT) ? gLangModelCHT.unigramsFor(key: unigramKey) : gLangModelCHS.unigramsFor(key: unigramKey) for unigram in unigrams { - if unigram.keyValue.value == userPhrase { + if unigram.value == userPhrase { return true } }