diff --git a/AUTHORS b/AUTHORS index fb456c5a..8a6aded1 100644 --- a/AUTHORS +++ b/AUTHORS @@ -32,8 +32,8 @@ $ Contributors and volunteers of the upstream repo, having no responsibility in Although there is no Lukhnos's codes left in the current repository, we still credit him for his previous work: - Lukhnos Liu: - - Developer of Gramambular language engine (removed since vChewing 1.5.4). - - Shiki Suen's Megrez engine is basically a Swift-rewritten version of Gramambular with further development. + - Developer of Gramambular 2 language engine (removed since vChewing 1.5.4). + - Shiki Suen's Megrez engine (MIT License) is basically a Swift-rewritten version of Gramambular 2 with further development. - Developer of Mandarin syllable composer (removed since vChewing 1.5.7). - Shiki Suen's Tekkon engine is made from scratch and has no relationship to Mandarin syllable composer. diff --git a/Source/Modules/ControllerModules/InputState.swift b/Source/Modules/ControllerModules/InputState.swift index 943c3db1..d75ec67a 100644 --- a/Source/Modules/ControllerModules/InputState.swift +++ b/Source/Modules/ControllerModules/InputState.swift @@ -277,7 +277,7 @@ public enum InputState { return arrOutput.joined(separator: " ") } - private var deleteTargetExists = false + private var markedTargetExists = false var tooltip: String { if composingBuffer.count != readings.count { ctlInputMethod.tooltipController.setColor(state: .denialOverflow) @@ -318,11 +318,12 @@ public enum InputState { userPhrase: text, mode: IME.currentInputMode, key: joined ) if exist { - deleteTargetExists = exist + markedTargetExists = exist ctlInputMethod.tooltipController.setColor(state: .prompt) return String( format: NSLocalizedString( - "\"%@\" already exists: ENTER to boost, \n SHIFT+CMD+ENTER to exclude.", comment: "" + "\"%@\" already exists: ENTER to boost, SHIFT+CMD+ENTER to nerf, \n BackSpace or Delete key to exclude.", + comment: "" ) + "\n// " + literalReadingThread, text ) } @@ -397,12 +398,10 @@ public enum InputState { return state } - var validToWrite: Bool { + var validToFilter: Bool { /// 與小麥注音不同,威注音會自動解消「游標插斷字符」的異常狀態,所以允許在字音長度不相符的情況下加詞。 /// 這裡的 deleteTargetExists 是防止使用者排除「詞庫內尚未存在的詞」。 - (ctlInputMethod.areWeDeleting && !deleteTargetExists) - ? false - : allowedMarkRange.contains(literalMarkedRange.count) + markedTargetExists ? allowedMarkRange.contains(literalMarkedRange.count) : false } var chkIfUserPhraseExists: Bool { @@ -418,7 +417,8 @@ public enum InputState { let text = composingBuffer.utf16SubString(with: markedRange) let selectedReadings = readings[literalMarkedRange] let joined = selectedReadings.joined(separator: "-") - return "\(text) \(joined)" + let nerfedScore = ctlInputMethod.areWeNerfing && markedTargetExists ? " -114.514" : "" + return "\(text) \(joined)\(nerfedScore)" } var userPhraseConverted: String { @@ -426,8 +426,9 @@ public enum InputState { ChineseConverter.crossConvert(composingBuffer.utf16SubString(with: markedRange)) ?? "" let selectedReadings = readings[literalMarkedRange] let joined = selectedReadings.joined(separator: "-") + let nerfedScore = ctlInputMethod.areWeNerfing && markedTargetExists ? " -114.514" : "" let convertedMark = "#𝙃𝙪𝙢𝙖𝙣𝘾𝙝𝙚𝙘𝙠𝙍𝙚𝙦𝙪𝙞𝙧𝙚𝙙" - return "\(text) \(joined)\t\(convertedMark)" + return "\(text) \(joined)\(nerfedScore)\t\(convertedMark)" } } diff --git a/Source/Modules/ControllerModules/KeyHandler_Core.swift b/Source/Modules/ControllerModules/KeyHandler_Core.swift index 26ea1ccc..85f98a89 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Core.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Core.swift @@ -23,7 +23,7 @@ protocol KeyHandlerDelegate { _: KeyHandler, didSelectCandidateAt index: Int, ctlCandidate controller: ctlCandidateProtocol ) - func keyHandler(_ keyHandler: KeyHandler, didRequestWriteUserPhraseWith state: InputStateProtocol) + func keyHandler(_ keyHandler: KeyHandler, didRequestWriteUserPhraseWith state: InputStateProtocol, addToFilter: Bool) -> Bool } @@ -38,13 +38,10 @@ public class KeyHandler { /// 檢測是否內容為空(注拼槽與組字器都是空的) var isTypingContentEmpty: Bool { composer.isEmpty && compositor.isEmpty } - /// 規定最大動態爬軌範圍。組字器內超出該範圍的節錨都會被自動標記為「已經手動選字過」,減少爬軌運算負擔。 - let kMaxComposingBufferNeedsToWalkSize = Int(max(12, ceil(Double(mgrPrefs.composingBufferSize) / 2))) var composer: Tekkon.Composer = .init() // 注拼槽 var compositor: Megrez.Compositor // 組字器 var currentLM: vChewing.LMInstantiator = .init() // 當前主語言模組 var currentUOM: vChewing.LMUserOverride = .init() // 當前半衰記憶模組 - var walkedAnchors: [Megrez.NodeAnchor] { compositor.walkedAnchors } // 用以記錄爬過的節錨的陣列 /// 委任物件 (ctlInputMethod),以便呼叫其中的函式。 var delegate: KeyHandlerDelegate? @@ -72,7 +69,7 @@ public class KeyHandler { /// 初期化。 public init() { /// 組字器初期化。因為是首次初期化變數,所以這裡不能用 ensureCompositor() 代勞。 - compositor = Megrez.Compositor(lm: currentLM, separator: "-") + compositor = Megrez.Compositor(with: currentLM, separator: "-") /// 注拼槽初期化。 ensureParser() /// 讀取最近的簡繁體模式、且將該屬性內容塞到 inputMode 當中。 @@ -91,7 +88,8 @@ public class KeyHandler { /// /// 威注音對游標前置與游標後置模式採取的候選字節點陣列抓取方法是分離的,且不使用 Node Crossing。 var actualCandidateCursor: Int { - mgrPrefs.useRearCursorMode ? min(compositor.cursor, compositor.length - 1) : max(compositor.cursor, 1) + compositor.cursor + - ((compositor.cursor == compositor.width || !mgrPrefs.useRearCursorMode) && compositor.cursor > 0 ? 1 : 0) } /// 利用給定的讀音鏈來試圖爬取最接近的組字結果(最大相似度估算)。 @@ -116,23 +114,6 @@ public class KeyHandler { } } - /// 在爬取組字結果之前,先將即將從組字區溢出的內容遞交出去。 - /// - /// 在理想狀況之下,組字區多長都無所謂。但是,Viterbi 演算法使用 O(N^2), - /// 會使得運算壓力隨著節錨數量的增加而增大。於是,有必要限定組字區的長度。 - /// 超過該長度的內容會在爬軌之前先遞交出去,使其不再記入最大相似度估算的 - /// 估算對象範圍。用比較形象且生動卻有點噁心的解釋的話,蒼蠅一邊吃一邊屙。 - var commitOverflownCompositionAndWalk: String { - var textToCommit = "" - if compositor.width > mgrPrefs.composingBufferSize, !walkedAnchors.isEmpty { - let anchor: Megrez.NodeAnchor = walkedAnchors[0] - textToCommit = anchor.node.currentPair.value - compositor.removeHeadReadings(count: anchor.spanLength) - } - walk() - return textToCommit - } - /// 用以組建聯想詞陣列的函式。 /// - Parameter key: 給定的聯想詞的開頭字。 /// - Returns: 抓取到的聯想詞陣列。 @@ -145,144 +126,115 @@ public class KeyHandler { return arrResult } - /// 在組字器內,以給定之候選字字串、來試圖在給定游標位置所在之處指定選字處理過程。 + /// 在組字器內,以給定之候選字(詞音配對)、來試圖在給定游標位置所在之處指定選字處理過程。 /// 然後再將對應的節錨內的節點標記為「已經手動選字過」。 /// - Parameters: - /// - value: 給定之候選字字串。 + /// - value: 給定之候選字(詞音配對)。 /// - respectCursorPushing: 若該選項為 true,則會在選字之後始終將游標推送至選字後的節錨的前方。 func fixNode(candidate: (String, String), respectCursorPushing: Bool = true) { - let theCandidate: Megrez.KeyValuePaired = .init(key: candidate.0, value: candidate.1) - let adjustedCursor = max(0, min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositor.length)) - // 開始讓半衰模組觀察目前的狀況。 - let selectedNode: Megrez.NodeAnchor = compositor.fixNodeWithCandidate(theCandidate, at: adjustedCursor) - // 不要針對逐字選字模式啟用臨時半衰記憶模型。 - if !mgrPrefs.useSCPCTypingMode { - var addToUserOverrideModel = true - // 所有讀音數與字符數不匹配的情況均不得塞入半衰記憶模組。 - if selectedNode.spanLength != theCandidate.value.count { - IME.prtDebugIntel("UOM: SpanningLength != value.count, dismissing.") - addToUserOverrideModel = false - } - if addToUserOverrideModel { - // 威注音的 SymbolLM 的 Score 是 -12,符合該條件的內容不得塞入半衰記憶模組。 - if selectedNode.node.scoreForPaired(candidate: theCandidate) <= -12 { - IME.prtDebugIntel("UOM: Score <= -12, dismissing.") - addToUserOverrideModel = false - } - } - if addToUserOverrideModel, mgrPrefs.fetchSuggestionsFromUserOverrideModel { - IME.prtDebugIntel("UOM: Start Observation.") - // 這個過程可能會因為使用者半衰記憶模組內部資料錯亂、而導致輸入法在選字時崩潰。 - // 於是在這裡引入災後狀況察覺專用變數,且先開啟該開關。順利執行完觀察後會關閉。 - // 一旦輸入法崩潰,會在重啟時發現這個開關是開著的,屆時 AppDelegate 會做出應對。 - mgrPrefs.failureFlagForUOMObservation = true - // 令半衰記憶模組觀測給定的三元圖。 - // 這個過程會讓半衰引擎根據當前上下文生成三元圖索引鍵。 - currentUOM.observe( - walkedAnchors: walkedAnchors, cursorIndex: adjustedCursor, candidate: theCandidate.value, - timestamp: NSDate().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() } - ) - // 如果沒有出現崩框的話,那就將這個開關復位。 - mgrPrefs.failureFlagForUOMObservation = false - } - } - + let actualCursor = actualCandidateCursor + let theCandidate: Megrez.Compositor.Candidate = .init(key: candidate.0, value: candidate.1) + if !compositor.overrideCandidate(theCandidate, at: actualCursor, overrideType: .withHighScore) { return } + let previousWalk = compositor.walkedNodes // 開始爬軌。 walk() + let currentWalk = compositor.walkedNodes + + // 在可行的情況下更新使用者半衰記憶模組。 + var accumulatedCursor = 0 + let currentNode = currentWalk.findNode(at: actualCandidateCursor, target: &accumulatedCursor) + guard let currentNode = currentNode else { return } + + if currentNode.currentUnigram.score > -12, mgrPrefs.fetchSuggestionsFromUserOverrideModel { + IME.prtDebugIntel("UOM: Start Observation.") + // 這個過程可能會因為使用者半衰記憶模組內部資料錯亂、而導致輸入法在選字時崩潰。 + // 於是在這裡引入災後狀況察覺專用變數,且先開啟該開關。順利執行完觀察後會關閉。 + // 一旦輸入法崩潰,會在重啟時發現這個開關是開著的,屆時 AppDelegate 會做出應對。 + mgrPrefs.failureFlagForUOMObservation = true + // 令半衰記憶模組觀測給定的三元圖。 + // 這個過程會讓半衰引擎根據當前上下文生成三元圖索引鍵。 + currentUOM.performObservation( + walkedBefore: previousWalk, walkedAfter: currentWalk, cursor: actualCandidateCursor, + timestamp: Date().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() } + ) + // 如果沒有出現崩框的話,那就將這個開關復位。 + mgrPrefs.failureFlagForUOMObservation = false + } /// 若偏好設定內啟用了相關選項,則會在選字之後始終將游標推送至選字後的節錨的前方。 if mgrPrefs.moveCursorAfterSelectingCandidate, respectCursorPushing { + // compositor.cursor = accumulatedCursor compositor.jumpCursorBySpan(to: .front) } } - /// 組字器內超出最大動態爬軌範圍的節錨都會被自動標記為「已經手動選字過」,減少爬軌運算負擔。 - func markNodesFixedIfNecessary() { - let width = compositor.width - if width <= kMaxComposingBufferNeedsToWalkSize { - return - } - var index = 0 - for anchor in walkedAnchors { - if index >= width - kMaxComposingBufferNeedsToWalkSize { break } - if anchor.node.score < Megrez.Node.kSelectedCandidateScore { - compositor.fixNodeWithCandidate(anchor.node.currentPair, at: index + anchor.spanLength) - } - index += anchor.spanLength - } - } - /// 獲取候選字詞(包含讀音)陣列資料內容。 func getCandidatesArray(fixOrder: Bool = true) -> [(String, String)] { - var arrAnchors: [Megrez.NodeAnchor] = rawAnchorsOfNodes - var arrCandidates: [Megrez.KeyValuePaired] = .init() + /// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。 + /// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的。 + var arrCandidates: [Megrez.Compositor.Candidate] = { + switch mgrPrefs.useRearCursorMode { + case false: + return compositor.fetchCandidates(at: actualCandidateCursor, filter: .endAt) + case true: + return compositor.fetchCandidates(at: actualCandidateCursor, filter: .beginAt) + } + }() /// 原理:nodes 這個回饋結果包含一堆子陣列,分別對應不同詞長的候選字。 /// 這裡先對陣列排序、讓最長候選字的子陣列的優先權最高。 /// 這個過程不會傷到子陣列內部的排序。 - if arrAnchors.isEmpty { return .init() } + if arrCandidates.isEmpty { return .init() } - // 讓更長的節錨排序靠前。 - arrAnchors = arrAnchors.stableSort { $0.spanLength > $1.spanLength } - - // 將節錨內的候選字詞資料拓印到輸出陣列內。 - for currentCandidate in arrAnchors.map(\.node.candidates).joined() { - // 選字窗的內容的康熙轉換 / JIS 轉換不能放在這裡處理,會影響選字有效性。 - // 選字的原理是拿著具體的候選字詞的字串去當前的節錨下找出對應的候選字詞(X元圖)。 - // 一旦在這裡轉換了,節錨內的某些元圖就無法被選中。 - arrCandidates.append(currentCandidate) - } // 決定是否根據半衰記憶模組的建議來調整候選字詞的順序。 if !mgrPrefs.fetchSuggestionsFromUserOverrideModel || mgrPrefs.useSCPCTypingMode || fixOrder { return arrCandidates.map { ($0.key, $0.value) } } - let arrSuggestedUnigrams: [Megrez.Unigram] = fetchSuggestedCandidates().stableSort { $0.score > $1.score } - let arrSuggestedCandidates: [Megrez.KeyValuePaired] = arrSuggestedUnigrams.map(\.keyValue) + let arrSuggestedUnigrams: [(String, Megrez.Unigram)] = fetchSuggestionsFromUOM(apply: false) + let arrSuggestedCandidates: [Megrez.Compositor.Candidate] = arrSuggestedUnigrams.map { + Megrez.Compositor.Candidate(key: $0.0, value: $0.1.value) + } arrCandidates = arrSuggestedCandidates.filter { arrCandidates.contains($0) } + arrCandidates arrCandidates = arrCandidates.deduplicate arrCandidates = arrCandidates.stableSort { $0.key.split(separator: "-").count > $1.key.split(separator: "-").count } return arrCandidates.map { ($0.key, $0.value) } } - /// 向半衰引擎詢問可能的選字建議。拿到的結果會是一個單元圖陣列。 - func fetchSuggestedCandidates() -> [Megrez.Unigram] { - currentUOM.suggest( - walkedAnchors: walkedAnchors, cursorIndex: compositor.cursor, - timestamp: NSDate().timeIntervalSince1970 - ) - } - /// 向半衰引擎詢問可能的選字建議、且套用給組字器內的當前游標位置。 - func fetchAndApplySuggestionsFromUserOverrideModel() { + @discardableResult func fetchSuggestionsFromUOM(apply: Bool) -> [(String, Megrez.Unigram)] { + var arrResult = [(String, Megrez.Unigram)]() /// 如果逐字選字模式有啟用的話,直接放棄執行這個函式。 - if mgrPrefs.useSCPCTypingMode { return } + if mgrPrefs.useSCPCTypingMode { return arrResult } /// 如果這個開關沒打開的話,直接放棄執行這個函式。 - if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return } - /// 先就當前上下文讓半衰引擎重新生成三元圖索引鍵。 - let overrideValue = fetchSuggestedCandidates().first?.keyValue.value ?? "" - - /// 再拿著索引鍵去問半衰模組有沒有選字建議。有的話就遵循之、讓天權星引擎對指定節錨下的節點複寫權重。 - if !overrideValue.isEmpty { - IME.prtDebugIntel( - "UOM: Suggestion retrieved, overriding the node score of the selected candidate.") - compositor.overrideNodeScoreForSelectedCandidate( - location: min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositor.length), - value: overrideValue, - overridingScore: findHighestScore(nodeAnchors: rawAnchorsOfNodes, epsilon: kEpsilon) - ) - } else { - IME.prtDebugIntel("UOM: Blank suggestion retrieved, dismissing.") + if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return arrResult } + /// 獲取來自半衰記憶模組的建議結果 + let suggestion = currentUOM.fetchSuggestion( + currentWalk: compositor.walkedNodes, cursor: actualCandidateCursor, timestamp: Date().timeIntervalSince1970 + ) + arrResult.append(contentsOf: suggestion.candidates) + if apply { + /// 再看有沒有選字建議。有的話就遵循之、讓天權星引擎對指定節錨下的節點複寫權重。 + if !suggestion.isEmpty, let newestSuggestedCandidate = suggestion.candidates.last { + let overrideBehavior: Megrez.Compositor.Node.OverrideType = + suggestion.forceHighScoreOverride ? .withHighScore : .withTopUnigramScore + let suggestedPair: Megrez.Compositor.Candidate = .init( + key: newestSuggestedCandidate.0, value: newestSuggestedCandidate.1.value + ) + IME.prtDebugIntel( + "UOM: Suggestion retrieved, overriding the node score of the selected candidate: \(suggestedPair.toNGramKey)") + if !compositor.overrideCandidate(suggestedPair, at: actualCandidateCursor, overrideType: overrideBehavior) { + compositor.overrideCandidateLiteral( + newestSuggestedCandidate.1.value, at: actualCandidateCursor, overrideType: overrideBehavior + ) + } + walk() + } else { + IME.prtDebugIntel("UOM: Blank suggestion retrieved, dismissing.") + } } - } - - /// 就給定的節錨陣列,根據半衰模組的衰減指數,來找出最高權重數值。 - /// - Parameters: - /// - nodes: 給定的節錨陣列。 - /// - epsilon: 半衰模組的衰減指數。 - /// - Returns: 尋獲的最高權重數值。 - func findHighestScore(nodeAnchors: [Megrez.NodeAnchor], epsilon: Double) -> Double { - nodeAnchors.map(\.node.highestUnigramScore).max() ?? 0 + epsilon + arrResult = arrResult.stableSort { $0.1.score > $1.1.score } + return arrResult } // MARK: - Extracted methods and functions (Tekkon). @@ -335,15 +287,6 @@ public class KeyHandler { // MARK: - Extracted methods and functions (Megrez). - /// 獲取原始節錨資料陣列。 - var rawAnchorsOfNodes: [Megrez.NodeAnchor] { - /// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。 - /// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的。 - mgrPrefs.useRearCursorMode - ? compositor.nodesBeginningAt(location: actualCandidateCursor) - : compositor.nodesEndingAt(location: actualCandidateCursor) - } - /// 將輸入法偏好設定同步至語言模組內。 func syncBaseLMPrefs() { currentLM.isPhraseReplacementEnabled = mgrPrefs.phraseReplacementEnabled @@ -354,7 +297,7 @@ public class KeyHandler { /// 令組字器重新初期化,使其與被重新指派過的主語言模組對接。 func ensureCompositor() { // 每個漢字讀音都由一個西文半形減號分隔開。 - compositor = Megrez.Compositor(lm: currentLM, separator: "-") + compositor = Megrez.Compositor(with: currentLM, separator: "-") } /// 生成標點符號索引鍵。 diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleCandidate.swift b/Source/Modules/ControllerModules/KeyHandler_HandleCandidate.swift index e68437b9..f369a266 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleCandidate.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleCandidate.swift @@ -51,7 +51,6 @@ extension KeyHandler { // 就將當前的組字緩衝區析構處理、強制重設輸入狀態。 // 否則,一個本不該出現的真空組字緩衝區會使前後方向鍵與 BackSpace 鍵失靈。 // 所以這裡需要對 compositor.isEmpty 做判定。 - clear() stateCallback(InputState.EmptyIgnoringPreviousState()) stateCallback(InputState.Empty()) } else { @@ -67,7 +66,6 @@ extension KeyHandler { if input.isEnter { if state is InputState.AssociatedPhrases, !mgrPrefs.alsoConfirmAssociatedCandidatesByEnter { - clear() stateCallback(InputState.EmptyIgnoringPreviousState()) stateCallback(InputState.Empty()) return true @@ -309,13 +307,15 @@ extension KeyHandler { let punctuationNamePrefix: String = generatePunctuationNamePrefix(withKeyCondition: input) let parser = currentMandarinParser let arrCustomPunctuations: [String] = [ - punctuationNamePrefix, parser, String(format: "%c", CChar(charCode)), + punctuationNamePrefix, parser, String(format: "%c", charCode.isPrintableASCII ? CChar(charCode) : inputText), ] let customPunctuation: String = arrCustomPunctuations.joined(separator: "") /// 看看這個輸入是否是不需要修飾鍵的那種標點鍵輸入。 - let arrPunctuations: [String] = [punctuationNamePrefix, String(format: "%c", CChar(charCode))] + let arrPunctuations: [String] = [ + punctuationNamePrefix, String(format: "%c", charCode.isPrintableASCII ? CChar(charCode) : inputText), + ] let punctuation: String = arrPunctuations.joined(separator: "") var shouldAutoSelectCandidate: Bool = @@ -323,7 +323,9 @@ extension KeyHandler { || currentLM.hasUnigramsFor(key: punctuation) if !shouldAutoSelectCandidate, input.isUpperCaseASCIILetterKey { - let letter: String! = String(format: "%@%c", "_letter_", CChar(charCode)) + let letter: String! = String( + format: "%@%c", "_letter_", charCode.isPrintableASCII ? CChar(charCode) : inputText + ) if currentLM.hasUnigramsFor(key: letter) { shouldAutoSelectCandidate = true } } @@ -335,7 +337,6 @@ extension KeyHandler { didSelectCandidateAt: candidateIndex, ctlCandidate: ctlCandidateCurrent ) - clear() stateCallback(InputState.EmptyIgnoringPreviousState()) stateCallback(InputState.Empty()) return handle( diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleComposition.swift b/Source/Modules/ControllerModules/KeyHandler_HandleComposition.swift index b9e9ec0e..eb57165b 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleComposition.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleComposition.swift @@ -81,23 +81,19 @@ extension KeyHandler { } // 將該讀音插入至組字器內的軌格當中。 - compositor.insertReading(readingKey) + compositor.insertKey(readingKey) // 讓組字器反爬軌格。 - let textToCommit = commitOverflownCompositionAndWalk + walk() // 看看半衰記憶模組是否會對目前的狀態給出自動選字建議。 - fetchAndApplySuggestionsFromUserOverrideModel() - - // 將組字器內超出最大動態爬軌範圍的節錨都標記為「已經手動選字過」,減少之後的爬軌運算負擔。 - markNodesFixedIfNecessary() + fetchSuggestionsFromUOM(apply: true) // 之後就是更新組字區了。先清空注拼槽的內容。 composer.clear() // 再以回呼組字狀態的方式來執行 updateClientComposingBuffer()。 let inputting = buildInputtingState - inputting.textToCommit = textToCommit stateCallback(inputting) /// 逐字選字模式的處理。 @@ -106,10 +102,9 @@ extension KeyHandler { state: inputting, isTypingVertical: input.isTypingVertical ) - if choosingCandidates.candidates.count == 1 { - clear() - let reading: String = choosingCandidates.candidates.first?.0 ?? "" - let text: String = choosingCandidates.candidates.first?.1 ?? "" + if choosingCandidates.candidates.count == 1, let firstCandidate = choosingCandidates.candidates.first { + let reading: String = firstCandidate.0 + let text: String = firstCandidate.1 stateCallback(InputState.Committing(textToCommit: text)) if !mgrPrefs.associatedPhrasesEnabled { diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift index be8a2b1d..d362d9c9 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift @@ -69,7 +69,6 @@ extension KeyHandler { // 略過對 BackSpace 的處理。 } else if input.isCapsLockOn || input.isASCIIModeInput { // 但願能夠處理這種情況下所有可能的按鍵組合。 - clear() stateCallback(InputState.Empty()) // 摁 Shift 的話,無須額外處理,因為直接就會敲出大寫字母。 @@ -99,7 +98,6 @@ extension KeyHandler { if !(state is InputState.ChoosingCandidate || state is InputState.AssociatedPhrases || state is InputState.SymbolTable) { - clear() stateCallback(InputState.Empty()) stateCallback(InputState.Committing(textToCommit: inputText.lowercased())) stateCallback(InputState.Empty()) @@ -162,14 +160,12 @@ extension KeyHandler { if !composingBuffer.isEmpty { stateCallback(InputState.Committing(textToCommit: composingBuffer)) } - clear() stateCallback(InputState.Committing(textToCommit: " ")) stateCallback(InputState.Empty()) } else if currentLM.hasUnigramsFor(key: " ") { - compositor.insertReading(" ") - let textToCommit = commitOverflownCompositionAndWalk + compositor.insertKey(" ") + walk() let inputting = buildInputtingState - inputting.textToCommit = textToCommit stateCallback(inputting) } return true @@ -259,13 +255,13 @@ extension KeyHandler { // MARK: Backspace if input.isBackSpace { - return handleBackSpace(state: state, stateCallback: stateCallback, errorCallback: errorCallback) + return handleBackSpace(state: state, input: input, stateCallback: stateCallback, errorCallback: errorCallback) } // MARK: Delete if input.isDelete || input.emacsKey == EmacsKey.delete { - return handleDelete(state: state, stateCallback: stateCallback, errorCallback: errorCallback) + return handleDelete(state: state, input: input, stateCallback: stateCallback, errorCallback: errorCallback) } // MARK: Enter @@ -286,10 +282,9 @@ extension KeyHandler { if input.isOptionHold { if currentLM.hasUnigramsFor(key: "_punctuation_list") { if composer.isEmpty { - compositor.insertReading("_punctuation_list") - let textToCommit: String! = commitOverflownCompositionAndWalk + compositor.insertKey("_punctuation_list") + walk() let inputting = buildInputtingState - inputting.textToCommit = textToCommit stateCallback(inputting) stateCallback(buildCandidate(state: inputting, isTypingVertical: input.isTypingVertical)) } else { // 不要在注音沒敲完整的情況下叫出統合符號選單。 @@ -334,7 +329,7 @@ extension KeyHandler { let punctuationNamePrefix: String = generatePunctuationNamePrefix(withKeyCondition: input) let parser = currentMandarinParser let arrCustomPunctuations: [String] = [ - punctuationNamePrefix, parser, String(format: "%c", CChar(charCode)), + punctuationNamePrefix, parser, String(format: "%c", charCode.isPrintableASCII ? CChar(charCode) : inputText), ] let customPunctuation: String = arrCustomPunctuations.joined(separator: "") if handlePunctuation( @@ -349,7 +344,9 @@ extension KeyHandler { /// 如果仍無匹配結果的話,看看這個輸入是否是不需要修飾鍵的那種標點鍵輸入。 - let arrPunctuations: [String] = [punctuationNamePrefix, String(format: "%c", CChar(charCode))] + let arrPunctuations: [String] = [ + punctuationNamePrefix, String(format: "%c", charCode.isPrintableASCII ? CChar(charCode) : inputText), + ] let punctuation: String = arrPunctuations.joined(separator: "") if handlePunctuation( @@ -362,20 +359,6 @@ extension KeyHandler { return true } - // 這裡不使用小麥注音 2.2 版的組字區處理方式,而是直接由詞庫負責。 - if input.isUpperCaseASCIILetterKey { - let letter: String! = String(format: "%@%c", "_letter_", CChar(charCode)) - if handlePunctuation( - letter, - state: state, - usingVerticalTyping: input.isTypingVertical, - stateCallback: stateCallback, - errorCallback: errorCallback - ) { - return true - } - } - // MARK: 全形/半形空白 (Full-Width / Half-Width Space) /// 該功能僅可在當前組字區沒有任何內容的時候使用。 @@ -387,6 +370,38 @@ extension KeyHandler { } } + // MARK: 摁住 Shift+字母鍵 的處理 (Shift+Letter Processing) + + // 這裡不使用小麥注音 2.2 版的組字區處理方式,而是直接由詞庫負責。 + if input.isUpperCaseASCIILetterKey, !input.isCommandHold, !input.isControlHold { + if input.isShiftHold { // 這裡先不要判斷 isOptionHold。 + switch mgrPrefs.upperCaseLetterKeyBehavior { + case 1: + stateCallback(InputState.Empty()) + stateCallback(InputState.Committing(textToCommit: inputText.lowercased())) + stateCallback(InputState.Empty()) + return true + case 2: + stateCallback(InputState.Empty()) + stateCallback(InputState.Committing(textToCommit: inputText.uppercased())) + stateCallback(InputState.Empty()) + return true + default: // 包括 case 0,直接塞給組字區。 + let letter: String! = String( + format: "%@%c", "_letter_", charCode.isPrintableASCII ? CChar(charCode) : inputText) + if handlePunctuation( + letter, + state: state, + usingVerticalTyping: input.isTypingVertical, + stateCallback: stateCallback, + errorCallback: errorCallback + ) { + return true + } + } + } + } + // MARK: - 終末處理 (Still Nothing) /// 對剩下的漏網之魚做攔截處理、直接將當前狀態繼續回呼給 ctlInputMethod。 diff --git a/Source/Modules/ControllerModules/KeyHandler_States.swift b/Source/Modules/ControllerModules/KeyHandler_States.swift index c2e07bc8..755c3f9b 100644 --- a/Source/Modules/ControllerModules/KeyHandler_States.swift +++ b/Source/Modules/ControllerModules/KeyHandler_States.swift @@ -22,21 +22,20 @@ extension KeyHandler { /// 「更新內文組字區 (Update the composing buffer)」是指要求客體軟體將組字緩衝區的內容 /// 換成由此處重新生成的組字字串(NSAttributeString,否則會不顯示)。 var tooltipParameterRef: [String] = ["", ""] - let nodeValuesArray: [String] = walkedAnchors.values + let nodeValuesArray: [String] = compositor.walkedNodes.values var composedStringCursorIndex = 0 var readingCursorIndex = 0 /// IMK 協定的內文組字區的游標長度與游標位置無法正確統計 UTF8 高萬字(比如 emoji)的長度, /// 所以在這裡必須做糾偏處理。因為在用 Swift,所以可以用「.utf16」取代「NSString.length()」。 /// 這樣就可以免除不必要的類型轉換。 - for theAnchor in walkedAnchors { - let theNode = theAnchor.node - let strNodeValue = theNode.currentPair.value + for theNode in compositor.walkedNodes { + let strNodeValue = theNode.value let arrSplit: [String] = Array(strNodeValue).map { String($0) } let codepointCount = arrSplit.count /// 藉下述步驟重新將「可見游標位置」對齊至「組字器內的游標所在的讀音位置」。 /// 每個節錨(NodeAnchor)都有自身的幅位長度(spanningLength),可以用來 /// 累加、以此為依據,來校正「可見游標位置」。 - let spanningLength: Int = theAnchor.spanLength + let spanningLength: Int = theNode.spanLength if readingCursorIndex + spanningLength <= compositor.cursor { composedStringCursorIndex += strNodeValue.utf16.count readingCursorIndex += spanningLength @@ -60,14 +59,14 @@ extension KeyHandler { /// 所以需要上下文工具提示來顯示游標的相對位置。 /// 這裡先計算一下要用在工具提示當中的顯示參數的內容。 switch compositor.cursor { - case compositor.readings.count...: + case compositor.keys.count...: // 這裡的 compositor.cursor 數值不可能大於 readings.count,因為會被 Megrez 自動糾正。 - tooltipParameterRef[0] = compositor.readings[compositor.cursor - 1] + tooltipParameterRef[0] = compositor.keys[compositor.cursor - 1] case 0: - tooltipParameterRef[1] = compositor.readings[compositor.cursor] + tooltipParameterRef[1] = compositor.keys[compositor.cursor] default: - tooltipParameterRef[0] = compositor.readings[compositor.cursor - 1] - tooltipParameterRef[1] = compositor.readings[compositor.cursor] + tooltipParameterRef[0] = compositor.keys[compositor.cursor - 1] + tooltipParameterRef[1] = compositor.keys[compositor.cursor] } } @@ -125,7 +124,7 @@ extension KeyHandler { cursorIndex: currentState.cursorIndex, candidates: getCandidatesArray(fixOrder: mgrPrefs.useFixecCandidateOrderOnSelection), isTypingVertical: isTypingVertical, - nodeValuesArray: walkedAnchors.values + nodeValuesArray: compositor.walkedNodes.values ) } @@ -177,7 +176,12 @@ extension KeyHandler { // Enter if input.isEnter { if let keyHandlerDelegate = delegate { - if !keyHandlerDelegate.keyHandler(self, didRequestWriteUserPhraseWith: state) { + // 先判斷是否是在摁了降權組合鍵的時候目標不在庫。 + if input.isShiftHold, input.isCommandHold, !state.validToFilter { + IME.prtDebugIntel("2EAC1F7A") + errorCallback() + return true + } else if !keyHandlerDelegate.keyHandler(self, didRequestWriteUserPhraseWith: state, addToFilter: false) { IME.prtDebugIntel("5B69CC8D") errorCallback() return true @@ -187,6 +191,24 @@ extension KeyHandler { return true } + // BackSpace & Delete + if input.isBackSpace || input.isDelete { + if let keyHandlerDelegate = delegate { + if !state.validToFilter { + IME.prtDebugIntel("1F88B191") + errorCallback() + return true + } + if !keyHandlerDelegate.keyHandler(self, didRequestWriteUserPhraseWith: state, addToFilter: true) { + IME.prtDebugIntel("68D3C6C8") + errorCallback() + return true + } + } + stateCallback(buildInputtingState) + return true + } + // Shift + Left if input.isCursorBackward || input.emacsKey == EmacsKey.backward, input.isShiftHold { var index = state.markerIndex @@ -197,7 +219,7 @@ extension KeyHandler { cursorIndex: state.cursorIndex, markerIndex: index, readings: state.readings, - nodeValuesArray: walkedAnchors.values + nodeValuesArray: compositor.walkedNodes.values ) marking.tooltipForInputting = state.tooltipForInputting stateCallback(marking.markedRange.isEmpty ? marking.convertedToInputting : marking) @@ -219,7 +241,7 @@ extension KeyHandler { cursorIndex: state.cursorIndex, markerIndex: index, readings: state.readings, - nodeValuesArray: walkedAnchors.values + nodeValuesArray: compositor.walkedNodes.values ) marking.tooltipForInputting = state.tooltipForInputting stateCallback(marking.markedRange.isEmpty ? marking.convertedToInputting : marking) @@ -262,10 +284,9 @@ extension KeyHandler { return true } - compositor.insertReading(customPunctuation) - let textToCommit = commitOverflownCompositionAndWalk + compositor.insertKey(customPunctuation) + walk() let inputting = buildInputtingState - inputting.textToCommit = textToCommit stateCallback(inputting) // 從這一行之後開始,就是針對逐字選字模式的單獨處理。 @@ -276,7 +297,7 @@ extension KeyHandler { isTypingVertical: isTypingVertical ) if candidateState.candidates.count == 1 { - clear() + clear() // 這句不要砍,因為下文可能會回呼 candidateState。 if let candidateToCommit: (String, String) = candidateState.candidates.first, !candidateToCommit.1.isEmpty { stateCallback(InputState.Committing(textToCommit: candidateToCommit.1)) stateCallback(InputState.Empty()) @@ -302,7 +323,6 @@ extension KeyHandler { ) -> Bool { guard let currentState = state as? InputState.Inputting else { return false } - clear() stateCallback(InputState.Committing(textToCommit: currentState.composingBuffer)) stateCallback(InputState.Empty()) return true @@ -321,7 +341,7 @@ extension KeyHandler { ) -> Bool { guard state is InputState.Inputting else { return false } - var composingBuffer = compositor.readings.joined(separator: "-") + var composingBuffer = compositor.keys.joined(separator: "-") if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin { composingBuffer = Tekkon.restoreToneOneInZhuyinKey(target: composingBuffer) // 恢復陰平標記 composingBuffer = Tekkon.cnvPhonaToHanyuPinyin(target: composingBuffer) // 注音轉拼音 @@ -331,8 +351,6 @@ extension KeyHandler { composingBuffer = composingBuffer.replacingOccurrences(of: "-", with: " ") } - clear() - stateCallback(InputState.Committing(textToCommit: composingBuffer)) stateCallback(InputState.Empty()) return true @@ -353,7 +371,7 @@ extension KeyHandler { var composed = "" - for node in walkedAnchors.map(\.node) { + for node in compositor.walkedNodes { var key = node.key if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin { key = Tekkon.restoreToneOneInZhuyinKey(target: key) // 恢復陰平標記 @@ -364,13 +382,11 @@ extension KeyHandler { key = Tekkon.cnvZhuyinChainToTextbookReading(target: key, newSeparator: " ") } - let value = node.currentPair.value + let value = node.value // 不要給標點符號等特殊元素加注音 composed += key.contains("_") ? value : "\(value)(\(key))" } - clear() - stateCallback(InputState.Committing(textToCommit: composed)) stateCallback(InputState.Empty()) return true @@ -381,21 +397,29 @@ extension KeyHandler { /// 處理 Backspace (macOS Delete) 按鍵行為。 /// - Parameters: /// - state: 當前狀態。 + /// - input: 輸入按鍵訊號。 /// - stateCallback: 狀態回呼。 /// - errorCallback: 錯誤回呼。 /// - Returns: 將按鍵行為「是否有處理掉」藉由 ctlInputMethod 回報給 IMK。 func handleBackSpace( state: InputStateProtocol, + input: InputSignal, stateCallback: @escaping (InputStateProtocol) -> Void, errorCallback: @escaping () -> Void ) -> Bool { guard state is InputState.Inputting else { return false } + if input.isShiftHold { + stateCallback(InputState.EmptyIgnoringPreviousState()) + stateCallback(InputState.Empty()) + return true + } + if composer.hasToneMarker(withNothingElse: true) { composer.clear() } else if composer.isEmpty { if compositor.cursor > 0 { - compositor.dropReading(direction: .rear) + compositor.dropKey(direction: .rear) walk() } else { IME.prtDebugIntel("9D69908D") @@ -421,32 +445,38 @@ extension KeyHandler { /// 處理 PC Delete (macOS Fn+BackSpace) 按鍵行為。 /// - Parameters: /// - state: 當前狀態。 + /// - input: 輸入按鍵訊號。 /// - stateCallback: 狀態回呼。 /// - errorCallback: 錯誤回呼。 /// - Returns: 將按鍵行為「是否有處理掉」藉由 ctlInputMethod 回報給 IMK。 func handleDelete( state: InputStateProtocol, + input: InputSignal, stateCallback: @escaping (InputStateProtocol) -> Void, errorCallback: @escaping () -> Void ) -> Bool { guard state is InputState.Inputting else { return false } - guard composer.isEmpty else { - IME.prtDebugIntel("9C69908D") - errorCallback() - stateCallback(state) + if input.isShiftHold { + stateCallback(InputState.EmptyIgnoringPreviousState()) + stateCallback(InputState.Empty()) return true } - guard compositor.cursor != compositor.length else { + if compositor.cursor == compositor.length, composer.isEmpty { IME.prtDebugIntel("9B69938D") errorCallback() stateCallback(state) return true } - compositor.dropReading(direction: .front) - walk() + if composer.isEmpty { + compositor.dropKey(direction: .front) + walk() + } else { + composer.clear() + } + let inputting = buildInputtingState // 這裡不用「count > 0」,因為該整數變數只要「!isEmpty」那就必定滿足這個條件。 switch inputting.composingBuffer.isEmpty { @@ -564,7 +594,6 @@ extension KeyHandler { if mgrPrefs.escToCleanInputBuffer { /// 若啟用了該選項,則清空組字器的內容與注拼槽的內容。 /// 此乃 macOS 內建注音輸入法預設之行為,但不太受 Windows 使用者群體之待見。 - clear() stateCallback(InputState.EmptyIgnoringPreviousState()) stateCallback(InputState.Empty()) } else { @@ -614,7 +643,7 @@ extension KeyHandler { composingBuffer: currentState.composingBuffer, cursorIndex: currentState.cursorIndex, markerIndex: nextPosition, - readings: compositor.readings + readings: compositor.keys ) marking.tooltipForInputting = currentState.tooltip stateCallback(marking) @@ -688,7 +717,7 @@ extension KeyHandler { composingBuffer: currentState.composingBuffer, cursorIndex: currentState.cursorIndex, markerIndex: previousPosition, - readings: compositor.readings + readings: compositor.keys ) marking.tooltipForInputting = currentState.tooltip stateCallback(marking) @@ -744,7 +773,7 @@ extension KeyHandler { stateCallback: @escaping (InputStateProtocol) -> Void, errorCallback: @escaping () -> Void ) -> Bool { - if composer.isEmpty, compositor.isEmpty || walkedAnchors.isEmpty { return false } + if composer.isEmpty, compositor.isEmpty || compositor.walkedNodes.isEmpty { return false } guard state is InputState.Inputting else { guard state is InputState.Empty else { IME.prtDebugIntel("6044F081") @@ -769,24 +798,27 @@ extension KeyHandler { } var length = 0 - var currentAnchor = Megrez.NodeAnchor() - let cursorIndex = min( - actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositor.length - ) - for anchor in walkedAnchors { - length += anchor.spanLength - if length >= cursorIndex { - currentAnchor = anchor + var currentNode: Megrez.Compositor.Node? + let cursorIndex = actualCandidateCursor + for node in compositor.walkedNodes { + length += node.spanLength + if length > cursorIndex { + currentNode = node break } } - let currentNode = currentAnchor.node - let currentPaired: Megrez.KeyValuePaired = currentNode.currentPair + guard let currentNode = currentNode else { + IME.prtDebugIntel("F58DEA95") + errorCallback() + return true + } + + let currentPaired = (currentNode.key, currentNode.value) var currentIndex = 0 - if currentNode.score < Megrez.Node.kSelectedCandidateScore { - /// 只要是沒有被使用者手動選字過的(節錨下的)節點, + if !currentNode.isOverriden { + /// 如果是沒有被使用者手動選字過的(節錨下的)節點, /// 就從第一個候選字詞開始,這樣使用者在敲字時就會優先匹配 /// 那些字詞長度不小於 2 的單元圖。換言之,如果使用者敲了兩個 /// 注音讀音、卻發現這兩個注音讀音各自的單字權重遠高於由這兩個 @@ -795,14 +827,14 @@ extension KeyHandler { /// (預設情況下是 (Shift+)Tab 來做正 (反) 向切換,但也可以用 /// Shift(+CMD)+Space 或 Alt+↑/↓ 來切換(縱排輸入時則是 Alt+←/→)、 /// 以應對臉書綁架 Tab 鍵的情況。 - if candidates[0].0 == currentPaired.key, candidates[0].1 == currentPaired.value { + if candidates[0] == currentPaired { /// 如果第一個候選字詞是當前節點的候選字詞的值的話, /// 那就切到下一個(或上一個,也就是最後一個)候選字詞。 currentIndex = reverseModifier ? candidates.count - 1 : 1 } } else { for candidate in candidates { - if candidate.0 == currentPaired.key, candidate.1 == currentPaired.value { + if candidate == currentPaired { if reverseModifier { if currentIndex == 0 { currentIndex = candidates.count - 1 diff --git a/Source/Modules/ControllerModules/ctlInputMethod_Core.swift b/Source/Modules/ControllerModules/ctlInputMethod_Core.swift index 1aa3cc3f..e9d60314 100644 --- a/Source/Modules/ControllerModules/ctlInputMethod_Core.swift +++ b/Source/Modules/ControllerModules/ctlInputMethod_Core.swift @@ -21,8 +21,8 @@ import InputMethodKit /// 輸入會話創建一個控制器型別。因此,對於每個輸入會話,都有一個對應的 IMKInputController。 @objc(ctlInputMethod) // 必須加上 ObjC,因為 IMK 是用 ObjC 寫的。 class ctlInputMethod: IMKInputController { - /// 標記狀態來聲明目前是在新增使用者語彙、還是準備要濾除使用者語彙。 - static var areWeDeleting = false + /// 標記狀態來聲明目前新增的詞彙是否需要賦以非常低的權重。 + static var areWeNerfing = false /// 目前在用的的選字窗副本。 static var ctlCandidateCurrent: ctlCandidateProtocol = ctlCandidateUniversal.init(.horizontal) @@ -46,6 +46,9 @@ class ctlInputMethod: IMKInputController { return isASCIIMode } + /// `handle(event:)` 會利用這個參數判定某次 Shift 按鍵是否用來切換中英文輸入。 + private var rencentKeyHandledByKeyHandler = false + // MARK: - 工具函式 /// 指定鍵盤佈局。 @@ -59,7 +62,6 @@ class ctlInputMethod: IMKInputController { /// 將傳回的新狀態交給調度函式。 handle(state: InputState.Committing(textToCommit: state.composingBufferConverted)) } - keyHandler.clear() handle(state: InputState.Empty()) } @@ -93,7 +95,7 @@ class ctlInputMethod: IMKInputController { // 所以這裡添加這句、來試圖應對這種情況。 if keyHandler.delegate == nil { keyHandler.delegate = self } setValue(IME.currentInputMode.rawValue, forTag: 114_514, client: client()) - keyHandler.clear() + keyHandler.clear() // 這句不要砍,因為後面 handle State.Empty() 不一定執行。 keyHandler.ensureParser() if isASCIIMode { @@ -120,7 +122,6 @@ class ctlInputMethod: IMKInputController { /// - Parameter sender: 呼叫了該函式的客體(無須使用)。 override func deactivateServer(_ sender: Any!) { _ = sender // 防止格式整理工具毀掉與此對應的參數。 - keyHandler.clear() handle(state: InputState.Empty()) handle(state: InputState.Deactivated()) } @@ -146,7 +147,7 @@ class ctlInputMethod: IMKInputController { if keyHandler.inputMode != newInputMode { UserDefaults.standard.synchronize() - keyHandler.clear() + keyHandler.clear() // 這句不要砍,因為後面 handle State.Empty() 不一定執行。 keyHandler.inputMode = newInputMode /// 必須加上下述條件,否則會在每次切換至輸入法本體的視窗(比如偏好設定視窗)時會卡死。 /// 這是很多 macOS 副廠輸入法的常見失誤之處。 @@ -188,13 +189,17 @@ class ctlInputMethod: IMKInputController { // 用 Shift 開關半形英數模式。 if ShiftKeyUpChecker.check(event) { - NotifierController.notify( - message: String( - format: "%@%@%@", NSLocalizedString("Alphanumerical Mode", comment: ""), "\n", - toggleASCIIMode() - ? NSLocalizedString("NotificationSwitchON", comment: "") - : NSLocalizedString("NotificationSwitchOFF", comment: "") - )) + if !rencentKeyHandledByKeyHandler { + NotifierController.notify( + message: String( + format: "%@%@%@", NSLocalizedString("Alphanumerical Mode", comment: ""), "\n", + toggleASCIIMode() + ? NSLocalizedString("NotificationSwitchON", comment: "") + : NSLocalizedString("NotificationSwitchOFF", comment: "") + ) + ) + } + rencentKeyHandledByKeyHandler = false return false } @@ -204,8 +209,8 @@ class ctlInputMethod: IMKInputController { /// 否則,每次處理這種判斷時都會觸發 NSInternalInconsistencyException。 if event.type == .flagsChanged { return false } - // 準備修飾鍵,用來判定是否需要利用就地新增語彙時的 Enter 鍵來砍詞。 - ctlInputMethod.areWeDeleting = event.modifierFlags.contains([.shift, .command]) + // 準備修飾鍵,用來判定要新增的詞彙是否需要賦以非常低的權重。 + ctlInputMethod.areWeNerfing = event.modifierFlags.contains([.shift, .command]) var textFrame = NSRect.zero @@ -240,6 +245,7 @@ class ctlInputMethod: IMKInputController { } errorCallback: { clsSFX.beep() } + rencentKeyHandledByKeyHandler = result return result } diff --git a/Source/Modules/ControllerModules/ctlInputMethod_Delegates.swift b/Source/Modules/ControllerModules/ctlInputMethod_Delegates.swift index a8d4a2cc..ead447f3 100644 --- a/Source/Modules/ControllerModules/ctlInputMethod_Delegates.swift +++ b/Source/Modules/ControllerModules/ctlInputMethod_Delegates.swift @@ -22,27 +22,22 @@ extension ctlInputMethod: KeyHandlerDelegate { ctlCandidate(controller, didSelectCandidateAtIndex: index) } - func keyHandler(_ keyHandler: KeyHandler, didRequestWriteUserPhraseWith state: InputStateProtocol) + func keyHandler(_ keyHandler: KeyHandler, didRequestWriteUserPhraseWith state: InputStateProtocol, addToFilter: Bool) -> Bool { - guard let state = state as? InputState.Marking else { - return false - } - if !state.validToWrite { - return false - } + guard let state = state as? InputState.Marking else { return false } let refInputModeReversed: InputMode = (keyHandler.inputMode == InputMode.imeModeCHT) ? InputMode.imeModeCHS : InputMode.imeModeCHT if !mgrLangModel.writeUserPhrase( state.userPhrase, inputMode: keyHandler.inputMode, areWeDuplicating: state.chkIfUserPhraseExists, - areWeDeleting: ctlInputMethod.areWeDeleting + areWeDeleting: addToFilter ) || !mgrLangModel.writeUserPhrase( state.userPhraseConverted, inputMode: refInputModeReversed, areWeDuplicating: false, - areWeDeleting: ctlInputMethod.areWeDeleting + areWeDeleting: addToFilter ) { return false @@ -116,9 +111,7 @@ extension ctlInputMethod: ctlCandidateDelegate { let inputting = keyHandler.buildInputtingState if mgrPrefs.useSCPCTypingMode { - keyHandler.clear() - let composingBuffer = inputting.composingBuffer - handle(state: InputState.Committing(textToCommit: composingBuffer)) + handle(state: InputState.Committing(textToCommit: inputting.composingBuffer)) // 此時是逐字選字模式,所以「selectedValue.1」是單個字、不用追加處理。 if mgrPrefs.associatedPhrasesEnabled, let associatePhrases = keyHandler.buildAssociatePhraseState( diff --git a/Source/Modules/ControllerModules/ctlInputMethod_HandleStates.swift b/Source/Modules/ControllerModules/ctlInputMethod_HandleStates.swift index 8e92cb92..c4fd77ec 100644 --- a/Source/Modules/ControllerModules/ctlInputMethod_HandleStates.swift +++ b/Source/Modules/ControllerModules/ctlInputMethod_HandleStates.swift @@ -131,6 +131,8 @@ extension ctlInputMethod { commit(text: previous.composingBuffer) } clearInlineDisplay() + // 最後一道保險 + keyHandler.clear() } private func handle(state: InputState.Empty, previous: InputStateProtocol) { @@ -147,6 +149,8 @@ extension ctlInputMethod { ctlInputMethod.ctlCandidateCurrent.visible = false ctlInputMethod.tooltipController.hide() clearInlineDisplay() + // 最後一道保險 + keyHandler.clear() } private func handle( @@ -167,6 +171,8 @@ extension ctlInputMethod { commit(text: textToCommit) } clearInlineDisplay() + // 最後一道保險 + keyHandler.clear() } private func handle(state: InputState.Inputting, previous: InputStateProtocol) { diff --git a/Source/Modules/IMEModules/mgrPrefs.swift b/Source/Modules/IMEModules/mgrPrefs.swift index aa08fcb2..4b1e4232 100644 --- a/Source/Modules/IMEModules/mgrPrefs.swift +++ b/Source/Modules/IMEModules/mgrPrefs.swift @@ -48,6 +48,7 @@ public enum UserDef: String, CaseIterable { case kAlsoConfirmAssociatedCandidatesByEnter = "AlsoConfirmAssociatedCandidatesByEnter" case kKeepReadingUponCompositionError = "KeepReadingUponCompositionError" case kTogglingAlphanumericalModeWithLShift = "TogglingAlphanumericalModeWithLShift" + case kUpperCaseLetterKeyBehavior = "UpperCaseLetterKeyBehavior" case kCandidateTextFontName = "CandidateTextFontName" case kCandidateKeyLabelFontName = "CandidateKeyLabelFontName" @@ -70,15 +71,6 @@ private let kDefaultMinKeyLabelSize: CGFloat = 10 private let kMinCandidateListTextSize: CGFloat = 12 private let kMaxCandidateListTextSize: CGFloat = 196 -// default, min and max composing buffer size (in codepoints) -// modern Macs can usually work up to 16 codepoints when the compositor still -// walks the grid with good performance slower Macs (like old PowerBooks) -// will start to sputter beyond 12 such is the algorithmatic complexity -// of the Viterbi algorithm used in the Megrez library (at O(N^2)) -private let kDefaultComposingBufferSize = 20 -private let kMinComposingBufferSize = 10 -private let kMaxComposingBufferSize = 40 - private let kDefaultKeys = "123456789" // MARK: - UserDefaults extension. @@ -137,34 +129,6 @@ struct CandidateListTextSize { } } -@propertyWrapper -struct ComposingBufferSize { - let key: String - let defaultValue: Int = kDefaultComposingBufferSize - lazy var container: UserDefault = .init(key: key, defaultValue: defaultValue) - - var wrappedValue: Int { - mutating get { - let currentValue = container.wrappedValue - if currentValue < kMinComposingBufferSize { - return kMinComposingBufferSize - } else if currentValue > kMaxComposingBufferSize { - return kMaxComposingBufferSize - } - return currentValue - } - set { - var value = newValue - if value < kMinComposingBufferSize { - value = kMinComposingBufferSize - } else if value > kMaxComposingBufferSize { - value = kMaxComposingBufferSize - } - container.wrappedValue = value - } - } -} - // MARK: - enum MandarinParser: Int { @@ -308,6 +272,9 @@ public enum mgrPrefs { UserDefaults.standard.setDefault( mgrPrefs.togglingAlphanumericalModeWithLShift, forKey: UserDef.kTogglingAlphanumericalModeWithLShift.rawValue ) + UserDefaults.standard.setDefault( + mgrPrefs.upperCaseLetterKeyBehavior, forKey: UserDef.kUpperCaseLetterKeyBehavior.rawValue + ) UserDefaults.standard.setDefault(mgrPrefs.usingHotKeySCPC, forKey: UserDef.kUsingHotKeySCPC.rawValue) UserDefaults.standard.setDefault(mgrPrefs.usingHotKeyAssociates, forKey: UserDef.kUsingHotKeyAssociates.rawValue) @@ -388,8 +355,6 @@ public enum mgrPrefs { @UserDefault(key: UserDef.kUseHorizontalCandidateList.rawValue, defaultValue: true) static var useHorizontalCandidateList: Bool - static var composingBufferSize: Int { 30 } - @UserDefault(key: UserDef.kChooseCandidateUsingSpace.rawValue, defaultValue: true) static var chooseCandidateUsingSpace: Bool @@ -408,9 +373,12 @@ public enum mgrPrefs { @UserDefault(key: UserDef.kAlsoConfirmAssociatedCandidatesByEnter.rawValue, defaultValue: true) static var alsoConfirmAssociatedCandidatesByEnter: Bool - @UserDefault(key: UserDef.kAlsoConfirmAssociatedCandidatesByEnter.rawValue, defaultValue: false) + @UserDefault(key: UserDef.kKeepReadingUponCompositionError.rawValue, defaultValue: false) static var keepReadingUponCompositionError: Bool + @UserDefault(key: UserDef.kUpperCaseLetterKeyBehavior.rawValue, defaultValue: 0) + static var upperCaseLetterKeyBehavior: Int + // MARK: - Settings (Tier 2) @UserDefault(key: UserDef.kTogglingAlphanumericalModeWithLShift.rawValue, defaultValue: true) diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index 38a9e019..e0c3c681 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -175,14 +175,8 @@ extension vChewing { /// - Parameter key: 給定的讀音字串。 /// - Returns: 對應的經過處理的單元圖陣列。 public func unigramsFor(key: String) -> [Megrez.Unigram] { - if key == " " { - /// 給空格鍵指定輸出值。 - let spaceUnigram = Megrez.Unigram( - keyValue: Megrez.KeyValuePaired(key: " ", value: " "), - score: 0 - ) - return [spaceUnigram] - } + /// 給空格鍵指定輸出值。 + if key == " " { return [.init(value: " ")] } /// 準備不同的語言模組容器,開始逐漸往容器陣列內塞入資料。 var rawAllUnigrams: [Megrez.Unigram] = [] @@ -209,11 +203,11 @@ extension vChewing { rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: key)) // 準備過濾清單。因為我們在 Swift 使用 NSOrderedSet,所以就不需要統計清單了。 - var filteredPairs: Set = [] + var filteredPairs: Set = [] // 載入要過濾的 KeyValuePair 清單。 for unigram in lmFiltered.unigramsFor(key: key) { - filteredPairs.insert(unigram.keyValue) + filteredPairs.insert(unigram.value) } return filterAndTransform( @@ -243,9 +237,6 @@ extension vChewing { lmAssociates.hasValuesFor(pair: pair) } - /// 該函式不起作用,僅用來滿足 LangModelProtocol 協定的要求。 - public func bigramsFor(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() } - // MARK: - 核心函式(對內) /// 給定單元圖原始結果陣列,經過語彙過濾處理+置換處理+去重複處理之後,給出單元圖結果陣列。 @@ -255,20 +246,20 @@ extension vChewing { /// - Returns: 經過語彙過濾處理+置換處理+去重複處理的單元圖結果陣列。 func filterAndTransform( unigrams: [Megrez.Unigram], - filter filteredPairs: Set + filter filteredPairs: Set ) -> [Megrez.Unigram] { var results: [Megrez.Unigram] = [] - var insertedPairs: Set = [] + var insertedPairs: Set = [] for unigram in unigrams { - var pair: Megrez.KeyValuePaired = unigram.keyValue - if filteredPairs.contains(pair) { continue } + var theValue: String = unigram.value + if filteredPairs.contains(theValue) { continue } if isPhraseReplacementEnabled { - let replacement = lmReplacements.valuesFor(key: pair.value) - if !replacement.isEmpty { pair.value = replacement } + let replacement = lmReplacements.valuesFor(key: theValue) + if !replacement.isEmpty { theValue = replacement } } - if insertedPairs.contains(pair) { continue } - results.append(Megrez.Unigram(keyValue: pair, score: unigram.score)) - insertedPairs.insert(pair) + if insertedPairs.contains(theValue) { continue } + results.append(Megrez.Unigram(value: theValue, score: unigram.score)) + insertedPairs.insert(theValue) } return results } diff --git a/Source/Modules/LangModelRelated/LMInstantiator_DateTimeExtension.swift b/Source/Modules/LangModelRelated/LMInstantiator_DateTimeExtension.swift index e6987ee6..3798d91d 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator_DateTimeExtension.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator_DateTimeExtension.swift @@ -31,9 +31,9 @@ extension vChewing.LMInstantiator { var date3 = ChineseConverter.convertArabicNumeralsToChinese(target: date2) date3 = date3.replacingOccurrences(of: "年〇", with: "年") date3 = date3.replacingOccurrences(of: "月〇", with: "月") - results.append(.init(keyValue: .init(key: key, value: date1), score: -94)) - results.append(.init(keyValue: .init(key: key, value: date2), score: -95)) - results.append(.init(keyValue: .init(key: key, value: date3), score: -96)) + results.append(.init(value: date1, score: -94)) + results.append(.init(value: date2, score: -95)) + results.append(.init(value: date3, score: -96)) if let currentDateShortened = currentDateShortened, delta.year != 0 { var dateAlt1: String = formatterDate1.string(from: currentDateShortened) dateAlt1.regReplace(pattern: #"^0+"#) @@ -42,9 +42,9 @@ extension vChewing.LMInstantiator { var dateAlt3 = ChineseConverter.convertArabicNumeralsToChinese(target: dateAlt2) dateAlt3 = dateAlt3.replacingOccurrences(of: "年〇", with: "年") dateAlt3 = dateAlt3.replacingOccurrences(of: "月〇", with: "月") - results.append(.init(keyValue: .init(key: key, value: dateAlt1), score: -97)) - results.append(.init(keyValue: .init(key: key, value: dateAlt2), score: -98)) - results.append(.init(keyValue: .init(key: key, value: dateAlt3), score: -99)) + results.append(.init(value: dateAlt1, score: -97)) + results.append(.init(value: dateAlt2, score: -98)) + results.append(.init(value: dateAlt3, score: -99)) } case "ㄕˊ-ㄐㄧㄢ": let formatterTime1 = DateFormatter() @@ -56,9 +56,9 @@ extension vChewing.LMInstantiator { let time1 = formatterTime1.string(from: currentDate) let time2 = formatterTime2.string(from: currentDate) let time3 = formatterTime3.string(from: currentDate) - results.append(.init(keyValue: .init(key: key, value: time1), score: -97)) - results.append(.init(keyValue: .init(key: key, value: time2), score: -98)) - results.append(.init(keyValue: .init(key: key, value: time3), score: -99)) + results.append(.init(value: time1, score: -97)) + results.append(.init(value: time2, score: -98)) + results.append(.init(value: time3, score: -99)) case "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ": let formatterWeek1 = DateFormatter() let formatterWeek2 = DateFormatter() @@ -68,8 +68,8 @@ extension vChewing.LMInstantiator { formatterWeek2.locale = theLocale let week1 = formatterWeek1.string(from: currentDate) let week2 = formatterWeek2.string(from: currentDate) - results.append(.init(keyValue: .init(key: key, value: week1), score: -98)) - results.append(.init(keyValue: .init(key: key, value: week2), score: -99)) + results.append(.init(value: week1, score: -98)) + results.append(.init(value: week2, score: -99)) default: return .init() } return results diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift index 4556c78b..5879eb95 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift @@ -115,18 +115,6 @@ extension vChewing { IME.prtDebugIntel(strDump) } - /// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成雙元圖陣列。 - /// - /// 威注音輸入法尚未引入雙元圖支援,所以該函式並未擴充相關功能,自然不會起作用。 - /// - parameters: - /// - precedingKey: 前述讀音索引鍵。 - /// - key: 當前讀音索引鍵。 - public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] { - // 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。 - // 其實只要一句「[Megrez.Bigram]()」就夠了。 - precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() - } - /// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成單元圖陣列。 /// - parameters: /// - key: 讀音索引鍵。 @@ -136,7 +124,6 @@ extension vChewing { for netaRange in arrRangeRecords { let neta = strData[netaRange].split(separator: " ") let theValue: String = shouldReverse ? String(neta[0]) : String(neta[1]) - let kvPair = Megrez.KeyValuePaired(key: key, value: theValue) var theScore = defaultScore if neta.count >= 3, !shouldForceDefaultScore, !neta[2].contains("#") { theScore = .init(String(neta[2])) ?? defaultScore @@ -144,7 +131,7 @@ extension vChewing { if theScore > 0 { theScore *= -1 // 應對可能忘記寫負號的情形 } - grams.append(Megrez.Unigram(keyValue: kvPair, score: theScore)) + grams.append(Megrez.Unigram(value: theValue, score: theScore)) } } return grams diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift index c513ef21..739df459 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift @@ -108,18 +108,6 @@ extension vChewing { IME.prtDebugIntel(strDump) } - /// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成雙元圖陣列。 - /// - /// 威注音輸入法尚未引入雙元圖支援,所以該函式並未擴充相關功能,自然不會起作用。 - /// - parameters: - /// - precedingKey: 前述讀音索引鍵。 - /// - key: 當前讀音索引鍵。 - public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] { - // 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。 - // 其實只要一句「[Megrez.Bigram]()」就夠了。 - precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() - } - /// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成單元圖陣列。 /// - parameters: /// - key: 讀音索引鍵。 @@ -130,7 +118,6 @@ extension vChewing { let strNetaSet = String(decoding: netaSet, as: UTF8.self) let neta = Array(strNetaSet.split(separator: " ").reversed()) let theValue: String = .init(neta[0]) - let kvPair = Megrez.KeyValuePaired(key: key, value: theValue) var theScore = defaultScore if neta.count >= 2, !shouldForceDefaultScore { theScore = .init(String(neta[1])) ?? defaultScore @@ -138,7 +125,7 @@ extension vChewing { if theScore > 0 { theScore *= -1 // 應對可能忘記寫負號的情形 } - grams.append(Megrez.Unigram(keyValue: kvPair, score: theScore)) + grams.append(Megrez.Unigram(value: theValue, score: theScore)) } } return grams diff --git a/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift index d5efd7bb..c13e4833 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift @@ -26,176 +26,46 @@ extension vChewing { mutDecayExponent = log(0.5) / decayConstant } - public func observe( - walkedAnchors: [Megrez.NodeAnchor], - cursorIndex: Int, - candidate: String, - timestamp: Double, - saveCallback: @escaping () -> Void + public func performObservation( + walkedBefore: [Megrez.Compositor.Node], walkedAfter: [Megrez.Compositor.Node], + cursor: Int, timestamp: Double, saveCallback: @escaping () -> Void ) { - let key = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex) + // 參數合規性檢查。 + guard !walkedAfter.isEmpty, !walkedBefore.isEmpty else { return } + guard walkedBefore.totalReadingsCount == walkedAfter.totalReadingsCount else { return } + // 先判斷用哪種覆寫方法。 + var actualCursor = 0 + guard let currentNode = walkedAfter.findNode(at: cursor, target: &actualCursor) else { return } + // 當前節點超過三個字的話,就不記憶了。在這種情形下,使用者可以考慮新增自訂語彙。 + guard currentNode.spanLength <= 3 else { return } + // 前一個節點得從前一次爬軌結果當中來找。 + guard actualCursor > 0 else { return } // 該情況應該不會出現。 + let currentNodeIndex = actualCursor + actualCursor -= 1 + var prevNodeIndex = 0 + guard let prevNode = walkedBefore.findNode(at: actualCursor, target: &prevNodeIndex) else { return } + + let forceHighScoreOverride: Bool = currentNode.spanLength > prevNode.spanLength + let breakingUp = currentNode.spanLength == 1 && prevNode.spanLength > 1 + + let targetNodeIndex = breakingUp ? currentNodeIndex : prevNodeIndex + let key: String = vChewing.LMUserOverride.formObservationKey( + walkedNodes: walkedAfter, headIndex: targetNodeIndex + ) guard !key.isEmpty else { return } - - guard mutLRUMap[key] != nil else { - var observation: Observation = .init() - observation.update(candidate: candidate, timestamp: timestamp) - let koPair = KeyObservationPair(key: key, observation: observation) - // 先移除 key 再設定 key 的話,就可以影響這個 key 在辭典內的順位。 - // Swift 原生的辭典是沒有數字索引排序的,但資料的插入順序卻有保存著。 - mutLRUMap.removeValue(forKey: key) - mutLRUMap[key] = koPair - mutLRUList.insert(koPair, at: 0) - - if mutLRUList.count > mutCapacity { - mutLRUMap.removeValue(forKey: mutLRUList[mutLRUList.endIndex].key) - mutLRUList.removeLast() - } - IME.prtDebugIntel("UOM: Observation finished with new observation: \(key)") - saveCallback() - return - } - // 降低磁碟寫入次數。唯有失憶的情況下才會更新觀察且記憶。 - if var theNeta = mutLRUMap[key] { - _ = suggest( - walkedAnchors: walkedAnchors, cursorIndex: cursorIndex, timestamp: timestamp, - decayCallback: { - theNeta.observation.update(candidate: candidate, timestamp: timestamp) - self.mutLRUList.insert(theNeta, at: 0) - self.mutLRUMap[key] = theNeta - IME.prtDebugIntel("UOM: Observation finished with existing observation: \(key)") - saveCallback() - } - ) - } + doObservation( + key: key, candidate: currentNode.currentUnigram.value, timestamp: timestamp, + forceHighScoreOverride: forceHighScoreOverride, saveCallback: { saveCallback() } + ) } - public func suggest( - walkedAnchors: [Megrez.NodeAnchor], - cursorIndex: Int, - timestamp: Double, - decayCallback: @escaping () -> Void = {} - ) -> [Megrez.Unigram] { - let key = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex) - guard !key.isEmpty else { - IME.prtDebugIntel("UOM: Blank key generated on suggestion, aborting suggestion.") - return .init() - } - let currentReadingKey = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex, readingOnly: true) - guard let koPair = mutLRUMap[key] else { - IME.prtDebugIntel("UOM: mutLRUMap[key] is nil, throwing blank suggestion for key: \(key).") - return .init() - } - - let observation = koPair.observation - - var arrResults = [Megrez.Unigram]() - var currentHighScore = 0.0 - for overrideNeta in Array(observation.overrides) { - let override: Override = overrideNeta.value - - let overrideScore: Double = getScore( - eventCount: override.count, - totalCount: observation.count, - eventTimestamp: override.timestamp, - timestamp: timestamp, - lambda: mutDecayExponent - ) - if (0...currentHighScore).contains(overrideScore) { continue } - - let overrideDetectionScore: Double = getScore( - eventCount: override.count, - totalCount: observation.count, - eventTimestamp: override.timestamp, - timestamp: timestamp, - lambda: mutDecayExponent * 2 - ) - if (0...currentHighScore).contains(overrideDetectionScore) { decayCallback() } - - let newUnigram = Megrez.Unigram( - keyValue: .init(key: currentReadingKey, value: overrideNeta.key), score: overrideScore - ) - arrResults.insert(newUnigram, at: 0) - currentHighScore = overrideScore - } - if arrResults.isEmpty { - IME.prtDebugIntel("UOM: No usable suggestions in the result for key: \(key).") - } - return arrResults - } - - private func getScore( - eventCount: Int, - totalCount: Int, - eventTimestamp: Double, - timestamp: Double, - lambda: Double - ) -> Double { - let decay = exp((timestamp - eventTimestamp) * lambda) - if decay < kDecayThreshold { return 0.0 } - let prob = Double(eventCount) / Double(totalCount) - return prob * decay - } - - func convertKeyFrom( - walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false - ) -> String { - let whiteList = "你他妳她祢衪它牠再在" - var arrNodes: [Megrez.NodeAnchor] = [] - var intLength = 0 - for theNodeAnchor in walkedAnchors { - arrNodes.append(theNodeAnchor) - intLength += theNodeAnchor.spanLength - if intLength >= cursorIndex { - break - } - } - - if arrNodes.isEmpty { return "" } - - arrNodes = Array(arrNodes.reversed()) - - let kvCurrent = arrNodes[0].node.currentPair - guard !kvCurrent.key.contains("_") else { - return "" - } - - // 字音數與字數不一致的內容會被拋棄。 - if kvCurrent.key.split(separator: "-").count != kvCurrent.value.count { return "" } - - // 前置單元只記錄讀音,在其後的單元則同時記錄讀音與字詞 - let strCurrent = kvCurrent.key - var kvPrevious = Megrez.KeyValuePaired() - var kvAnterior = Megrez.KeyValuePaired() - var readingStack = "" - var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" } - var result: String { - // 不要把單個漢字的 kvCurrent 當前鍵值領頭的單元圖記入資料庫,不然對敲字體驗破壞太大。 - if readingStack.contains("_") - || (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value)) - { - return "" - } else { - return (readingOnly ? strCurrent : trigramKey) - } - } - - if arrNodes.count >= 2, - !kvPrevious.key.contains("_"), - kvPrevious.key.split(separator: "-").count == kvPrevious.value.count - { - kvPrevious = arrNodes[1].node.currentPair - readingStack = kvPrevious.key + readingStack - } - - if arrNodes.count >= 3, - !kvAnterior.key.contains("_"), - kvAnterior.key.split(separator: "-").count == kvAnterior.value.count - { - kvAnterior = arrNodes[2].node.currentPair - readingStack = kvAnterior.key + readingStack - } - - return result + public func fetchSuggestion( + currentWalk: [Megrez.Compositor.Node], cursor: Int, timestamp: Double + ) -> Suggestion { + var headIndex = 0 + guard let nodeIter = currentWalk.findNode(at: cursor, target: &headIndex) else { return .init() } + let key = vChewing.LMUserOverride.formObservationKey(walkedNodes: currentWalk, headIndex: headIndex) + return getSuggestion(key: key, timestamp: timestamp, headReading: nodeIter.key) } } } @@ -210,6 +80,7 @@ extension vChewing.LMUserOverride { struct Override: Hashable, Encodable, Decodable { var count: Int = 0 var timestamp: Double = 0.0 + var forceHighScoreOverride = false static func == (lhs: Override, rhs: Override) -> Bool { lhs.count == rhs.count && lhs.timestamp == rhs.timestamp } @@ -244,11 +115,12 @@ extension vChewing.LMUserOverride { hasher.combine(overrides) } - mutating func update(candidate: String, timestamp: Double) { + mutating func update(candidate: String, timestamp: Double, forceHighScoreOverride: Bool = false) { count += 1 if overrides.keys.contains(candidate) { overrides[candidate]?.timestamp = timestamp overrides[candidate]?.count += 1 + overrides[candidate]?.forceHighScoreOverride = forceHighScoreOverride } else { overrides[candidate] = .init(count: 1, timestamp: timestamp) } @@ -333,4 +205,203 @@ extension vChewing.LMUserOverride { return } } + + public struct Suggestion { + var candidates = [(String, Megrez.Unigram)]() + var forceHighScoreOverride = false + var isEmpty: Bool { candidates.isEmpty } + } +} + +// MARK: - Array Extensions. + +extension Array where Element == Megrez.Compositor.Node { + public var totalReadingsCount: Int { + var counter = 0 + for node in self { + counter += node.keyArray.count + } + return counter + } + + public func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? { + guard !isEmpty else { return nil } + let cursor = Swift.max(0, Swift.min(cursor, keys.count)) + + if cursor == 0, let theFirst = first { + outCursorPastNode = theFirst.spanLength + return theFirst + } + + // 同時應對「游標在右端」與「游標離右端還差一個位置」的情形。 + if cursor >= keys.count - 1, let theLast = last { + outCursorPastNode = keys.count + return theLast + } + + var accumulated = 0 + for neta in self { + accumulated += neta.spanLength + if accumulated > cursor { + outCursorPastNode = accumulated + return neta + } + } + + // 下述情形本不應該出現。 + return nil + } +} + +// MARK: - Private Methods + +extension vChewing.LMUserOverride { + private func doObservation( + key: String, candidate: String, timestamp: Double, forceHighScoreOverride: Bool, + saveCallback: @escaping () -> Void + ) { + guard mutLRUMap[key] != nil else { + var observation: Observation = .init() + observation.update(candidate: candidate, timestamp: timestamp, forceHighScoreOverride: forceHighScoreOverride) + let koPair = KeyObservationPair(key: key, observation: observation) + // 先移除 key 再設定 key 的話,就可以影響這個 key 在辭典內的順位。 + // Swift 原生的辭典是沒有數字索引排序的,但資料的插入順序卻有保存著。 + mutLRUMap.removeValue(forKey: key) + mutLRUMap[key] = koPair + mutLRUList.insert(koPair, at: 0) + + if mutLRUList.count > mutCapacity { + mutLRUMap.removeValue(forKey: mutLRUList[mutLRUList.endIndex].key) + mutLRUList.removeLast() + } + IME.prtDebugIntel("UOM: Observation finished with new observation: \(key)") + saveCallback() + return + } + // TODO: 降低磁碟寫入次數。唯有失憶的情況下才會更新觀察且記憶。 + if var theNeta = mutLRUMap[key] { + _ = getSuggestion( + key: key, timestamp: timestamp, headReading: "", + decayCallback: { + theNeta.observation.update( + candidate: candidate, timestamp: timestamp, forceHighScoreOverride: forceHighScoreOverride + ) + self.mutLRUList.insert(theNeta, at: 0) + self.mutLRUMap[key] = theNeta + IME.prtDebugIntel("UOM: Observation finished with existing observation: \(key)") + saveCallback() + } + ) + } + } + + private func getSuggestion( + key: String, timestamp: Double, headReading: String, decayCallback: @escaping () -> Void = {} + ) -> Suggestion { + guard !key.isEmpty, let kvPair = mutLRUMap[key] else { return .init() } + let observation: Observation = kvPair.observation + var candidates: [(String, Megrez.Unigram)] = .init() + var forceHighScoreOverride = false + var currentHighScore: Double = 0 + for (i, theObservation) in observation.overrides { + let overrideScore = getScore( + eventCount: theObservation.count, totalCount: observation.count, + eventTimestamp: theObservation.timestamp, timestamp: timestamp, lambda: mutDecayExponent + ) + if (0...currentHighScore).contains(overrideScore) { continue } + let overrideDetectionScore: Double = getScore( + eventCount: theObservation.count, totalCount: observation.count, + eventTimestamp: theObservation.timestamp, timestamp: timestamp, lambda: mutDecayExponent * 2 + ) + if (0...currentHighScore).contains(overrideDetectionScore) { decayCallback() } + + candidates.append((headReading, .init(value: i, score: overrideScore))) + forceHighScoreOverride = theObservation.forceHighScoreOverride + currentHighScore = overrideScore + } + return .init(candidates: candidates, forceHighScoreOverride: forceHighScoreOverride) + } + + private func getScore( + eventCount: Int, + totalCount: Int, + eventTimestamp: Double, + timestamp: Double, + lambda: Double + ) -> Double { + let decay = exp((timestamp - eventTimestamp) * lambda) + if decay < kDecayThreshold { return 0.0 } + let prob = Double(eventCount) / Double(totalCount) + return prob * decay + } + + private static func isPunctuation(_ node: Megrez.Compositor.Node) -> Bool { + for key in node.keyArray { + guard let firstChar = key.first else { continue } + return String(firstChar) == "_" + } + return false + } + + private static func formObservationKey( + walkedNodes: [Megrez.Compositor.Node], headIndex cursorIndex: Int, readingOnly: Bool = false + ) -> String { + let whiteList = "你他妳她祢衪它牠再在" + var arrNodes: [Megrez.Compositor.Node] = [] + var intLength = 0 + for theNodeAnchor in walkedNodes { + arrNodes.append(theNodeAnchor) + intLength += theNodeAnchor.spanLength + if intLength >= cursorIndex { + break + } + } + + if arrNodes.isEmpty { return "" } + + arrNodes = Array(arrNodes.reversed()) + + let kvCurrent = arrNodes[0].currentPair + guard !kvCurrent.key.contains("_") else { + return "" + } + + // 字音數與字數不一致的內容會被拋棄。 + if kvCurrent.key.split(separator: "-").count != kvCurrent.value.count { return "" } + + // 前置單元只記錄讀音,在其後的單元則同時記錄讀音與字詞 + let strCurrent = kvCurrent.key + var kvPrevious = Megrez.KeyValuePaired() + var kvAnterior = Megrez.KeyValuePaired() + var readingStack = "" + var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" } + var result: String { + // 不要把單個漢字的 kvCurrent 當前鍵值領頭的單元圖記入資料庫,不然對敲字體驗破壞太大。 + if readingStack.contains("_") + || (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value)) + { + return "" + } else { + return (readingOnly ? strCurrent : trigramKey) + } + } + + if arrNodes.count >= 2, + !kvPrevious.key.contains("_"), + kvPrevious.key.split(separator: "-").count == kvPrevious.value.count + { + kvPrevious = arrNodes[1].currentPair + readingStack = kvPrevious.key + readingStack + } + + if arrNodes.count >= 3, + !kvAnterior.key.contains("_"), + kvAnterior.key.split(separator: "-").count == kvAnterior.value.count + { + kvAnterior = arrNodes[2].currentPair + readingStack = kvAnterior.key + readingStack + } + + return result + } } diff --git a/Source/Modules/LangModelRelated/mgrLangModel.swift b/Source/Modules/LangModelRelated/mgrLangModel.swift index 6815cf7d..3fc48028 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.swift +++ b/Source/Modules/LangModelRelated/mgrLangModel.swift @@ -184,7 +184,7 @@ enum mgrLangModel { (mode == InputMode.imeModeCHT) ? gLangModelCHT.unigramsFor(key: unigramKey) : gLangModelCHS.unigramsFor(key: unigramKey) for unigram in unigrams { - if unigram.keyValue.value == userPhrase { + if unigram.value == userPhrase { return true } } diff --git a/Source/Modules/LanguageParsers/Megrez/0_Megrez.swift b/Source/Modules/LanguageParsers/Megrez/0_Megrez.swift index ee2ecea9..3bdc535c 100644 --- a/Source/Modules/LanguageParsers/Megrez/0_Megrez.swift +++ b/Source/Modules/LanguageParsers/Megrez/0_Megrez.swift @@ -1,11 +1,21 @@ -// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). -// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // ==================== // This code is released under the MIT license (SPDX-License-Identifier: MIT) -// ... with NTL restriction stating that: -// No trademark license is granted to use the trade names, trademarks, service -// marks, or product names of Contributor, except as required to fulfill notice -// requirements defined in MIT License. /// The namespace for this package. -public enum Megrez {} +public enum Megrez { + public typealias KeyValuePaired = Compositor.Candidate // 相容性措施。 +} + +// 著作權聲明: +// 除了 Megrez 專有的修改與實作以外,該套件所有程式邏輯來自於 Gramambular、算法歸 Lukhnos Liu 所有。 +// 天權星引擎(Megrez Compositor)僅僅是將 Gramambular 用 Swift 重寫之後繼續開發的結果而已。 + +// 術語: + +// Grid: 節軌 +// Walk: 爬軌 +// Node: 節點 +// SpanLength: 節幅 +// Span: 幅位 diff --git a/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift b/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift index fa217383..9d0be506 100644 --- a/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift +++ b/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift @@ -1,41 +1,91 @@ -// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). -// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // ==================== // This code is released under the MIT license (SPDX-License-Identifier: MIT) -// ... with NTL restriction stating that: -// No trademark license is granted to use the trade names, trademarks, service -// marks, or product names of Contributor, except as required to fulfill notice -// requirements defined in MIT License. extension Megrez { - /// 組字器。 - public class Compositor: Grid { + /// 一個組字器用來在給定一系列的索引鍵的情況下(藉由一系列的觀測行為)返回一套資料值。 + /// + /// 用於輸入法的話,給定的索引鍵可以是注音、且返回的資料值都是漢語字詞組合。該組字器 + /// 還可以用來對文章做分節處理:此時的索引鍵為漢字,返回的資料值則是漢語字詞分節組合。 + /// + /// - Remark: 雖然這裡用了隱性 Markov 模型(HMM)的術語,但實際上在爬軌時用到的則是更 + /// 簡單的貝氏推論:因為底層的語言模組只會提供單元圖資料。一旦將所有可以組字的單元圖 + /// 作為節點塞到組字器內,就可以用一個簡單的有向無環圖爬軌過程、來利用這些隱性資料值 + /// 算出最大相似估算結果。 + public class Compositor { /// 就文字輸入方向而言的方向。 public enum TypingDirection { case front, rear } - /// 給被丟掉的節點路徑施加的負權重。 - private let kDroppedPathScore: Double = -999 + /// 軌格增減行為。 + public enum ResizeBehavior { case expand, shrink } + /// 該軌格內可以允許的最大幅位長度。 + public static var maxSpanLength: Int = 10 { didSet { maxSpanLength = max(6, maxSpanLength) } } + /// 公開:多字讀音鍵當中用以分割漢字讀音的記號的預設值,是「-」。 + public static let kDefaultSeparator: String = "-" /// 該組字器的游標位置。 - public var cursor: Int = 0 { didSet { cursor = max(0, min(cursor, readings.count)) } } - /// 該組字器的讀音陣列。 - private(set) var readings: [String] = [] - /// 該組字器所使用的語言模型。 - private var langModel: LangModelProtocol + public var cursor: Int = 0 { didSet { cursor = max(0, min(cursor, length)) } } + /// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為「-」。 + public var separator = kDefaultSeparator + /// 公開:組字器內已經插入的單筆索引鍵的數量。 + public var width: Int { keys.count } + /// 公開:最近一次爬軌結果。 + public var walkedNodes: [Node] = [] + /// 公開:該組字器的長度,也就是內建漢字讀音的數量(唯讀)。 + public var length: Int { keys.count } + /// 公開:組字器是否為空。 + public var isEmpty: Bool { spans.isEmpty && keys.isEmpty } + + /// 該組字器的索引鍵陣列。 + private(set) var keys = [String]() + /// 該組字器的幅位陣列。 + private(set) var spans = [Span]() + /// 該組字器所使用的語言模型(被 LangModelRanked 所封裝)。 + private(set) var langModel: LangModelRanked /// 允許查詢當前游標位置屬於第幾個幅位座標(從 0 開始算)。 private(set) var cursorRegionMap: [Int: Int] = .init() - /// 用以記錄爬過的節錨的陣列。 - private(set) var walkedAnchors: [NodeAnchor] = [] - /// 該函式用以更新爬過的節錨的陣列。 - /// - Parameter nodes: 傳入的節點陣列。 - public func updateWalkedAnchors(with nodes: [Node]) { - walkedAnchors = nodes.map { Megrez.NodeAnchor(node: $0) } + /// 初期化一個組字器。 + /// - Parameter langModel: 要對接的語言模組。 + public init(with langModel: LangModelProtocol, separator: String = "-") { + self.langModel = .init(withLM: langModel) + self.separator = separator } - /// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為空。 - public var joinSeparator: String = "-" + public func clear() { + cursor = 0 + keys.removeAll() + spans.removeAll() + walkedNodes.removeAll() + cursorRegionMap.removeAll() + } - /// 公開:該組字器的長度,也就是內建漢字讀音的數量(唯讀)。 - public var length: Int { readings.count } + /// 在游標位置插入給定的索引鍵。 + /// - Parameter key: 要插入的索引鍵。 + /// - Returns: 該操作是否成功執行。 + @discardableResult public func insertKey(_ key: String) -> Bool { + guard !key.isEmpty, key != separator, langModel.hasUnigramsFor(key: key) else { return false } + keys.insert(key, at: cursor) + resizeGrid(at: cursor, do: .expand) + update() + cursor += 1 // 游標必須得在執行 update() 之後才可以變動。 + return true + } + + /// 朝著指定方向砍掉一個與游標相鄰的讀音。 + /// + /// 在威注音的術語體系當中,「與文字輸入方向相反的方向」為向後(Rear),反之則為向前(Front)。 + /// 如果是朝著與文字輸入方向相反的方向砍的話,游標位置會自動遞減。 + /// - Parameter direction: 指定方向(相對於文字輸入方向而言)。 + /// - Returns: 該操作是否成功執行。 + @discardableResult public func dropKey(direction: TypingDirection) -> Bool { + let isBackSpace: Bool = direction == .rear ? true : false + guard cursor != (isBackSpace ? 0 : keys.count) else { return false } + keys.remove(at: cursor - (isBackSpace ? 1 : 0)) + cursor -= isBackSpace ? 1 : 0 // 在縮節之前。 + resizeGrid(at: cursor, do: .shrink) + update() + return true + } /// 按幅位來前後移動游標。 /// - Parameter direction: 移動方向。 @@ -50,21 +100,21 @@ extension Megrez { guard let currentRegion = cursorRegionMap[cursor] else { return false } let aRegionForward = max(currentRegion - 1, 0) - let currentRegionBorderRear: Int = walkedAnchors[0.. walkedAnchors.count) - ? readings.count : walkedAnchors[0...currentRegion].map(\.spanLength).reduce(0, +) + (currentRegion > walkedNodes.count) + ? keys.count : walkedNodes[0...currentRegion].map(\.spanLength).reduce(0, +) case .rear: - cursor = walkedAnchors[0.. Bool { - guard !reading.isEmpty, langModel.hasUnigramsFor(key: reading) else { return false } - readings.insert(reading, at: cursor) - resizeGridByOneAt(location: cursor, to: .expand) - build() - cursor += 1 - return true - } - - /// 朝著指定方向砍掉一個與游標相鄰的讀音。 - /// - /// 在威注音的術語體系當中,「與文字輸入方向相反的方向」為向後(Rear),反之則為向前(Front)。 - /// - Parameter direction: 指定方向。 - /// - Returns: 該操作是否順利完成。 - @discardableResult public func dropReading(direction: TypingDirection) -> Bool { - let isBackSpace = direction == .rear - if cursor == (isBackSpace ? 0 : readings.count) { - return false - } - readings.remove(at: cursor - (isBackSpace ? 1 : 0)) - cursor -= (isBackSpace ? 1 : 0) - resizeGridByOneAt(location: cursor, to: .shrink) - build() - return true - } - - /// 移除該組字器最先被輸入的第 X 個讀音單元。 - /// - /// 用於輸入法組字區長度上限處理: - /// 將該位置要溢出的敲字內容遞交之後、再執行這個函式。 - @discardableResult public func removeHeadReadings(count: Int) -> Bool { - let count = abs(count) // 防呆 - if count > length { return false } - for _ in 0.. [NodeAnchor] { - let newLocation = width - // 這裡把所有空節點都過濾掉。 - walkedAnchors = Array( - reverseWalk(at: newLocation).reversed() - ).lazy.filter { !$0.isEmpty } - updateCursorJumpingTables(walkedAnchors) - return walkedAnchors - } - - // MARK: - Private functions - - /// 內部專用反芻函式,對已給定的軌格按照給定的位置與條件進行反向爬軌。 - /// - Parameters: - /// - location: 開始爬軌的位置。 - /// - mass: 給定累計權重,非必填參數。預設值為 0。 - /// - joinedPhrase: 用以統計累計長詞的內部參數,請勿主動使用。 - /// - longPhrases: 用以統計累計長詞的內部參數,請勿主動使用。 - /// - Returns: 一個包含結果的節錨陣列。 - private func reverseWalk( - at location: Int, - mass: Double = 0.0, - joinedPhrase: String = "", - longPhrases: [String] = .init() - ) -> [NodeAnchor] { - let location = abs(location) // 防呆 - if location == 0 || location > width { - return .init() - } - - var paths = [[NodeAnchor]]() - let nodes = nodesEndingAt(location: location).stableSorted { - $0.node.score > $1.node.score - } - - guard !nodes.isEmpty else { return .init() } // 防止下文出現範圍外索引的錯誤 - - if nodes[0].node.score >= Node.kSelectedCandidateScore { - // 在使用者有選過候選字詞的情況下,摒棄非依此據而成的節點路徑。 - var theAnchor = nodes[0] - theAnchor.mass = mass + nodes[0].node.score - var path: [NodeAnchor] = reverseWalk( - at: location - theAnchor.spanLength, mass: theAnchor.mass - ) - path.insert(theAnchor, at: 0) - paths.append(path) - } else if !longPhrases.isEmpty { - var path = [NodeAnchor]() - for theAnchor in nodes { - var theAnchor = theAnchor - let joinedValue = theAnchor.node.currentPair.value + joinedPhrase - // 如果只是一堆單漢字的節點組成了同樣的長詞的話,直接棄用這個節點路徑。 - // 打比方說「八/月/中/秋/山/林/涼」與「八月/中秋/山林/涼」在使用者來看 - // 是「結果等價」的,那就扔掉前者。 - if longPhrases.contains(joinedValue) { - theAnchor.mass = kDroppedPathScore - path.insert(theAnchor, at: 0) - paths.append(path) - continue + /// 生成用以交給 GraphViz 診斷的資料檔案內容,純文字。 + public var dumpDOT: String { + var strOutput = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n" + for (p, span) in spans.enumerated() { + for ni in 0...(span.maxLength) { + guard let np = span.nodeOf(length: ni) else { continue } + if p == 0 { + strOutput += "BOS -> \(np.value);\n" } - theAnchor.mass = mass + theAnchor.node.score - path = reverseWalk( - at: location - theAnchor.spanLength, - mass: theAnchor.mass, - joinedPhrase: (joinedValue.count >= longPhrases[0].count) ? "" : joinedValue, - longPhrases: .init() - ) - path.insert(theAnchor, at: 0) - paths.append(path) - } - } else { - // 看看當前格位有沒有更長的候選字詞。 - var longPhrases = [String]() - for theAnchor in nodes.lazy.filter({ $0.spanLength > 1 }) { - longPhrases.append(theAnchor.node.currentPair.value) - } - - longPhrases = longPhrases.stableSorted { - $0.count > $1.count - } - for theAnchor in nodes { - var theAnchor = theAnchor - theAnchor.mass = mass + theAnchor.node.score - var path = [NodeAnchor]() - path = reverseWalk( - at: location - theAnchor.spanLength, mass: theAnchor.mass, - joinedPhrase: (theAnchor.spanLength > 1) ? "" : theAnchor.node.currentPair.value, - longPhrases: .init() - ) - path.insert(theAnchor, at: 0) - paths.append(path) + strOutput += "\(np.value);\n" + if (p + ni) < spans.count { + let destinationSpan = spans[p + ni] + for q in 0...(destinationSpan.maxLength) { + guard let dn = destinationSpan.nodeOf(length: q) else { continue } + strOutput += np.value + " -> " + dn.value + ";\n" + } + } + guard (p + ni) == spans.count else { continue } + strOutput += np.value + " -> EOS;\n" } } - - guard !paths.isEmpty else { - return .init() - } - - var result: [NodeAnchor] = paths[0] - for neta in paths.lazy.filter({ - $0.last!.mass > result.last!.mass - }) { - result = neta - } - - return result // 空節點過濾的步驟交給 walk() 這個對外函式,以避免重複執行清理步驟。 - } - - private func build() { - let itrBegin: Int = - (cursor < maxBuildSpanLength) ? 0 : cursor - maxBuildSpanLength - let itrEnd: Int = min(cursor + maxBuildSpanLength, readings.count) - - for p in itrBegin.. itrEnd { break } - let arrSlice = readings[p..<(p + q)] - let combinedReading: String = join(slice: arrSlice, separator: joinSeparator) - if hasMatchedNode(location: p, spanLength: q, key: combinedReading) { continue } - let unigrams: [Unigram] = langModel.unigramsFor(key: combinedReading) - if unigrams.isEmpty { continue } - let n: Node = .init(key: combinedReading, spanLength: q, unigrams: unigrams) - insertNode(node: n, location: p, spanLength: q) - } - } - } - - private func join(slice arrSlice: ArraySlice, separator: String) -> String { - arrSlice.joined(separator: separator) - } - - internal func updateCursorJumpingTables(_ anchors: [NodeAnchor]) { - var cursorRegionMapDict = [Int: Int]() - cursorRegionMapDict[-1] = 0 // 防呆 - var counter = 0 - for (i, anchor) in anchors.enumerated() { - for _ in 0.. Bool - ) - rethrows -> [Element] - { - try enumerated() - .sorted { a, b -> Bool in - try areInIncreasingOrder(a.element, b.element) - || (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element)) + /// 拿新增幅位來打比方的話,在擴增幅位之前: + /// ``` + /// Span Index 0 1 2 3 + /// (---) + /// (-------) + /// (-----------) + /// ``` + /// 在幅位座標 2 (SpanIndex = 2) 的位置擴增一個幅位之後: + /// ``` + /// Span Index 0 1 2 3 4 + /// (---) + /// (XXX? ?XXX) <-被扯爛的節點 + /// (XXXXXXX? ?XXX) <-被扯爛的節點 + /// ``` + /// 拿縮減幅位來打比方的話,在縮減幅位之前: + /// ``` + /// Span Index 0 1 2 3 + /// (---) + /// (-------) + /// (-----------) + /// ``` + /// 在幅位座標 2 的位置就地砍掉一個幅位之後: + /// ``` + /// Span Index 0 1 2 3 4 + /// (---) + /// (XXX? <-被砍爛的節點 + /// (XXXXXXX? <-被砍爛的節點 + /// ``` + /// - Parameter location: 給定的幅位座標。 + func dropWreckedNodes(at location: Int) { + let location = max(min(location, spans.count), 0) // 防呆 + guard !spans.isEmpty else { return } + let affectedLength = Megrez.Compositor.maxSpanLength - 1 + let begin = max(0, location - affectedLength) + guard location >= begin else { return } + for i in begin.. Bool { + let location = max(min(location, spans.count - 1), 0) // 防呆 + spans[location].append(node: node) + return true + } + + func getJointKey(range: Range) -> String { + // 下面這句不能用 contains,不然會要求至少 macOS 13 Ventura。 + guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return "" } + return keys[range].joined(separator: separator) + } + + func getJointKeyArray(range: Range) -> [String] { + // 下面這句不能用 contains,不然會要求至少 macOS 13 Ventura。 + guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] } + return keys[range].map { String($0) } + } + + func hasNode(at location: Int, length: Int, key: String) -> Bool { + let location = max(min(location, spans.count), 0) // 防呆 + guard let node = spans[location].nodeOf(length: length) else { return false } + return key == node.key + } + + func update() { + let maxSpanLength = Megrez.Compositor.maxSpanLength + let range = max(0, cursor - maxSpanLength)..= spans.count { - let diff = location - spans.count + 1 - for _ in 0.. Bool { - let location = abs(location) // 防呆 - let spanLength = abs(spanLength) // 防呆 - if location > spans.count { - return false - } - - let n = spans[location].nodeOf(length: spanLength) - return n != nil && key == n?.key - } - - /// 在該軌格的指定位置擴增或減少一個幅位。 - /// - Parameters: - /// - location: 位置。 - public func resizeGridByOneAt(location: Int, to behavior: ResizeBehavior) { - let location = max(0, min(width, location)) // 防呆 - switch behavior { - case .expand: - spans.insert(SpanUnit(), at: location) - if [spans.count, 0].contains(location) { return } - case .shrink: - if location >= spans.count { return } - spans.remove(at: location) - } - for i in 0.. [NodeAnchor] { - let location = abs(location) // 防呆 - var results = [NodeAnchor]() - if location >= spans.count { return results } - // 此時 spans 必然不為空,因為 location 不可能小於 0。 - let span = spans[location] - for i in 1...maxBuildSpanLength { - if let np = span.nodeOf(length: i) { - results.append(.init(node: np)) - } - } - return results // 已證實不會有空節點產生。 - } - - /// 給定位置,枚舉出所有在這個位置結尾的節點。 - /// - Parameters: - /// - location: 位置。 - public func nodesEndingAt(location: Int) -> [NodeAnchor] { - let location = abs(location) // 防呆 - var results = [NodeAnchor]() - if spans.isEmpty || location > spans.count { return results } - for i in 0.. [NodeAnchor] { - let location = abs(location) // 防呆 - var results = [NodeAnchor]() - if spans.isEmpty || location > spans.count { return results } - for i in 0.. [NodeAnchor] { - Array(Set(nodesBeginningAt(location: location) + nodesCrossingOrEndingAt(location: location))) - } - - /// 使用給定的候選字字串,將給定位置的節點的候選字詞改為與給定的字串一致的候選字詞。 - /// - /// 該函式可以僅用作過程函式,但準確度不如用於處理候選字鍵值配對的 fixNodeWithCandidate()。 - /// - Parameters: - /// - location: 位置。 - /// - value: 給定字串。 - @discardableResult public func fixNodeWithCandidateLiteral(_ value: String, at location: Int) -> NodeAnchor { - let location = abs(location) // 防呆 - var node = NodeAnchor() - for theAnchor in nodesCrossingOrEndingAt(location: location) { - let candidates = theAnchor.node.candidates - // 將該位置的所有節點的候選字詞鎖定狀態全部重設。 - theAnchor.node.resetCandidate() - for (i, candidate) in candidates.enumerated() { - if candidate.value == value { - theAnchor.node.selectCandidateAt(index: i) - node = theAnchor - break - } - } - } - return node - } - - /// 使用給定的候選字鍵值配對,將給定位置的節點的候選字詞改為與給定的字串一致的候選字詞。 - /// - /// 該函式可以僅用作過程函式。 - /// - Parameters: - /// - location: 位置。 - /// - value: 給定候選字鍵值配對。 - @discardableResult public func fixNodeWithCandidate(_ pair: KeyValuePaired, at location: Int) -> NodeAnchor { - let location = abs(location) // 防呆 - var node = NodeAnchor() - for theAnchor in nodesCrossingOrEndingAt(location: location) { - let candidates = theAnchor.node.candidates - // 將該位置的所有節點的候選字詞鎖定狀態全部重設。 - theAnchor.node.resetCandidate() - for (i, candidate) in candidates.enumerated() { - if candidate == pair { - theAnchor.node.selectCandidateAt(index: i) - node = theAnchor - break - } - } - } - return node - } - - /// 將給定位置的節點的與給定的字串一致的候選字詞的權重複寫為給定權重數值。 - /// - Parameters: - /// - location: 位置。 - /// - value: 給定字串。 - /// - overridingScore: 給定權重數值。 - public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) { - let location = abs(location) // 防呆 - for theAnchor in nodesOverlappedAt(location: location) { - let candidates = theAnchor.node.candidates - // 將該位置的所有節點的候選字詞鎖定狀態全部重設。 - theAnchor.node.resetCandidate() - for (i, candidate) in candidates.enumerated() { - if candidate.value == value { - theAnchor.node.selectFloatingCandidateAt(index: i, score: overridingScore) - break - } - } - } - } - } -} - -// MARK: - DumpDOT-related functions. - -extension Megrez.Grid { - /// 生成用以交給 GraphViz 診斷的資料檔案內容,純文字。 - public var dumpDOT: String { - var strOutput = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n" - for (p, span) in spans.enumerated() { - for ni in 0...(span.maxLength) { - guard let np = span.nodeOf(length: ni) else { continue } - if p == 0 { - strOutput += "BOS -> \(np.currentPair.value);\n" - } - strOutput += "\(np.currentPair.value);\n" - if (p + ni) < spans.count { - let destinationSpan = spans[p + ni] - for q in 0...(destinationSpan.maxLength) { - guard let dn = destinationSpan.nodeOf(length: q) else { continue } - strOutput += np.currentPair.value + " -> " + dn.currentPair.value + ";\n" - } - } - guard (p + ni) == spans.count else { continue } - strOutput += np.currentPair.value + " -> EOS;\n" - } - } - strOutput += "EOS;\n}\n" - return strOutput - } -} diff --git a/Source/Modules/LanguageParsers/Megrez/2_Walker.swift b/Source/Modules/LanguageParsers/Megrez/2_Walker.swift new file mode 100644 index 00000000..b7f4b091 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/2_Walker.swift @@ -0,0 +1,107 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) + +extension Megrez.Compositor { + /// 找到軌格陣圖內權重最大的路徑。該路徑代表了可被觀測到的最可能的隱藏事件鏈。 + /// 這裡使用 Cormen 在 2001 年出版的教材當中提出的「有向無環圖的最短路徑」的 + /// 算法來計算這種路徑。不過,這裡不是要計算距離最短的路徑,而是計算距離最長 + /// 的路徑(所以要找最大的權重),因為在對數概率下,較大的數值意味著較大的概率。 + /// 對於 `G = (V, E)`,該算法的運行次數為 `O(|V|+|E|)`,其中 `G` 是一個有向無環圖。 + /// 這意味著,即使軌格很大,也可以用很少的算力就可以爬軌。 + /// - Returns: 爬軌結果+該過程是否順利執行。 + @discardableResult public func walk() -> ([Node], Bool) { + var result = [Node]() + defer { + walkedNodes = result + updateCursorJumpingTables(walkedNodes) + } + guard !spans.isEmpty else { return (result, true) } + + var vertexSpans = [VertexSpan]() + for _ in spans { + vertexSpans.append(.init()) + } + + for (i, span) in spans.enumerated() { + for j in 1...span.maxLength { + if let p = span.nodeOf(length: j) { + vertexSpans[i].append(.init(node: p)) + } + } + } + + let terminal = Vertex(node: .init(keyArray: ["_TERMINAL_"], keySeparator: separator)) + + for (i, vertexSpan) in vertexSpans.enumerated() { + for vertex in vertexSpan { + let nextVertexPosition = i + vertex.node.spanLength + if nextVertexPosition == vertexSpans.count { + vertex.edges.append(terminal) + continue + } + for nextVertex in vertexSpans[nextVertexPosition] { + vertex.edges.append(nextVertex) + } + } + } + + let root = Vertex(node: .init(keyArray: ["_ROOT_"], keySeparator: separator)) + root.distance = 0 + root.edges.append(contentsOf: vertexSpans[0]) + + var ordered: [Vertex] = topologicalSort(root: root) + for (j, neta) in ordered.reversed().enumerated() { + for (k, _) in neta.edges.enumerated() { + relax(u: neta, v: &neta.edges[k]) + } + ordered[j] = neta + } + + var walked = [Node]() + var totalKeyLength = 0 + var it = terminal + while let itPrev = it.prev { + walked.append(itPrev.node) + it = itPrev + totalKeyLength += it.node.spanLength + } + + guard totalKeyLength == keys.count else { + print("!!! ERROR A") + return (result, false) + } + guard walked.count >= 2 else { + print("!!! ERROR B") + return (result, false) + } + walked = walked.reversed() + walked.removeFirst() + result = walked + return (result, true) + } +} + +// MARK: - Stable Sort Extension + +// Reference: https://stackoverflow.com/a/50545761/4162914 + +extension Sequence { + /// Return a stable-sorted collection. + /// + /// - Parameter areInIncreasingOrder: Return nil when two element are equal. + /// - Returns: The sorted collection. + fileprivate func stableSorted( + by areInIncreasingOrder: (Element, Element) throws -> Bool + ) + rethrows -> [Element] + { + try enumerated() + .sorted { a, b -> Bool in + try areInIncreasingOrder(a.element, b.element) + || (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element)) + } + .map(\.element) + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/3_Candidate.swift b/Source/Modules/LanguageParsers/Megrez/3_Candidate.swift new file mode 100644 index 00000000..ddca9506 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/3_Candidate.swift @@ -0,0 +1,181 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) + +import Foundation + +extension Megrez.Compositor { + public struct Candidate: Equatable, Hashable, Comparable, CustomStringConvertible { + /// 鍵。一般情況下用來放置讀音等可以用來作為索引的內容。 + public var key: String + /// 資料值。 + public var value: String + /// 將當前鍵值列印成一個字串。 + public var description: String { "(" + key + "," + value + ")" } + /// 判斷當前鍵值配對是否合規。如果鍵與值有任一為空,則結果為 false。 + public var isValid: Bool { !key.isEmpty && !value.isEmpty } + /// 將當前鍵值列印成一個字串,但如果該鍵值配對為空的話則僅列印「()」。 + public var toNGramKey: String { !isValid ? "()" : "(" + key + "," + value + ")" } + + /// 初期化一組鍵值配對。 + /// - Parameters: + /// - key: 鍵。一般情況下用來放置讀音等可以用來作為索引的內容。 + /// - value: 資料值。 + public init(key: String = "", value: String = "") { + self.key = key + self.value = value + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(key) + hasher.combine(value) + } + + public static func == (lhs: Candidate, rhs: Candidate) -> Bool { + lhs.key == rhs.key && lhs.value == rhs.value + } + + public static func < (lhs: Candidate, rhs: Candidate) -> Bool { + (lhs.key.count < rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value < rhs.value) + } + + public static func > (lhs: Candidate, rhs: Candidate) -> Bool { + (lhs.key.count > rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value > rhs.value) + } + + public static func <= (lhs: Candidate, rhs: Candidate) -> Bool { + (lhs.key.count <= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value <= rhs.value) + } + + public static func >= (lhs: Candidate, rhs: Candidate) -> Bool { + (lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value) + } + } + + public enum CandidateFetchFilter { case all, beginAt, endAt } + + /// 返回在當前位置的所有候選字詞(以詞音配對的形式)。如果組字器內有幅位、且游標 + /// 位於組字器的(文字輸入順序的)最前方(也就是游標位置的數值是最大合規數值)的 + /// 話,那麼這裡會用到 location - 1、以免去在呼叫該函數後再處理的麻煩。 + /// - Parameter location: 游標位置。 + /// - Returns: 候選字音配對陣列。 + public func fetchCandidates(at location: Int, filter: CandidateFetchFilter = .all) -> [Candidate] { + var result = [Candidate]() + guard !keys.isEmpty else { return result } + let location = max(min(location, keys.count - 1), 0) // 防呆 + let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted { + // 按照讀音的長度來給節點排序。 + $0.spanLength > $1.spanLength + } + let keyAtCursor = keys[location] + for theNode in anchors.map(\.node) { + if theNode.key.isEmpty { continue } + for gram in theNode.unigrams { + switch filter { + case .all: + // 得加上這道篩選,所以會出現很多無效結果。 + if !theNode.keyArray.contains(keyAtCursor) { continue } + case .beginAt: + if theNode.keyArray[0] != keyAtCursor { continue } + case .endAt: + if theNode.keyArray.reversed()[0] != keyAtCursor { continue } + } + result.append(.init(key: theNode.key, value: gram.value)) + } + } + return result + } + + /// 使用給定的候選字(詞音配對),將給定位置的節點的候選字詞改為與之一致的候選字詞。 + /// + /// 該函式可以僅用作過程函式。 + /// - Parameters: + /// - candidate: 指定用來覆寫為的候選字(詞音配對)。 + /// - location: 游標位置。 + /// - overrideType: 指定覆寫行為。 + /// - Returns: 該操作是否成功執行。 + @discardableResult public func overrideCandidate( + _ candidate: Candidate, at location: Int, overrideType: Node.OverrideType = .withHighScore + ) + -> Bool + { + overrideCandidateAgainst(key: candidate.key, at: location, value: candidate.value, type: overrideType) + } + + /// 使用給定的候選字詞字串,將給定位置的節點的候選字詞改為與之一致的候選字詞。 + /// + /// 注意:如果有多個「單元圖資料值雷同、卻讀音不同」的節點的話,該函數的行為結果不可控。 + /// - Parameters: + /// - candidate: 指定用來覆寫為的候選字(字串)。 + /// - location: 游標位置。 + /// - overrideType: 指定覆寫行為。 + /// - Returns: 該操作是否成功執行。 + @discardableResult public func overrideCandidateLiteral( + _ candidate: String, + at location: Int, overrideType: Node.OverrideType = .withHighScore + ) -> Bool { + overrideCandidateAgainst(key: nil, at: location, value: candidate, type: overrideType) + } + + // MARK: Internal implementations. + + /// 使用給定的候選字(詞音配對)、或給定的候選字詞字串,將給定位置的節點的候選字詞改為與之一致的候選字詞。 + /// - Parameters: + /// - key: 索引鍵,也就是詞音配對當中的讀音。 + /// - location: 游標位置。 + /// - value: 資料值。 + /// - type: 指定覆寫行為。 + /// - Returns: 該操作是否成功執行。 + internal func overrideCandidateAgainst(key: String?, at location: Int, value: String, type: Node.OverrideType) + -> Bool + { + let location = max(min(location, keys.count), 0) // 防呆 + var arrOverlappedNodes: [NodeAnchor] = fetchOverlappingNodes(at: min(keys.count - 1, location)) + var overridden: NodeAnchor? + for anchor in arrOverlappedNodes { + if let key = key, anchor.node.key != key { continue } + if anchor.node.selectOverrideUnigram(value: value, type: type) { + overridden = anchor + break + } + } + + guard let overridden = overridden else { return false } // 啥也不覆寫。 + + for i in overridden.spanIndex.. Bool + ) + rethrows -> [Element] + { + try enumerated() + .sorted { a, b -> Bool in + try areInIncreasingOrder(a.element, b.element) + || (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element)) + } + .map(\.element) + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift b/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift deleted file mode 100644 index 72a0ff08..00000000 --- a/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift +++ /dev/null @@ -1,78 +0,0 @@ -// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). -// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). -// ==================== -// This code is released under the MIT license (SPDX-License-Identifier: MIT) -// ... with NTL restriction stating that: -// No trademark license is granted to use the trade names, trademarks, service -// marks, or product names of Contributor, except as required to fulfill notice -// requirements defined in MIT License. - -extension Megrez { - /// 節锚。 - @frozen public struct NodeAnchor: Hashable { - /// 用來判斷該節錨是否為空。 - public var isEmpty: Bool { node.key.isEmpty } - /// 節點。一個節锚內不一定有節點。 - public var node: Node = .init() - /// 指定的幅位長度。 - public var spanLength: Int { node.spanLength } - /// 獲取用來比較的權重。 - public var scoreForSort: Double { node.score } - /// 累計權重。 - public var mass: Double = 0.0 - /// 單元圖陣列。 - public var unigrams: [Unigram] { node.unigrams } - /// 雙元圖陣列。 - public var bigrams: [Bigram] { node.bigrams } - /// 鍵。 - public var key: String { node.key } - - /// 初期化一個節錨。 - public init(node: Node = .init(), mass: Double? = nil) { - self.node = node - self.mass = mass ?? self.node.score - } - - /// 將該節錨雜湊化。 - public func hash(into hasher: inout Hasher) { - hasher.combine(node) - hasher.combine(mass) - } - - /// 將當前節锚列印成一個字串。 - public var description: String { - var stream = "" - stream += "{@(" + String(spanLength) + ")," - if node.key.isEmpty { - stream += node.description - } else { - stream += "null" - } - stream += "}" - return stream - } - } -} - -// MARK: - Array Extensions. - -extension Array where Element == Megrez.NodeAnchor { - /// 將節锚陣列列印成一個字串。 - public var description: String { - var arrOutputContent = [""] - for anchor in self { - arrOutputContent.append(anchor.description) - } - return arrOutputContent.joined(separator: "<-") - } - - /// 從一個節錨陣列當中取出目前的自動選字字串陣列。 - public var values: [String] { - map(\.node.currentPair.value) - } - - /// 從一個節錨陣列當中取出目前的索引鍵陣列。 - public var keys: [String] { - map(\.node.currentPair.key) - } -} diff --git a/Source/Modules/LanguageParsers/Megrez/3_Span.swift b/Source/Modules/LanguageParsers/Megrez/3_Span.swift deleted file mode 100644 index 0e4c00eb..00000000 --- a/Source/Modules/LanguageParsers/Megrez/3_Span.swift +++ /dev/null @@ -1,63 +0,0 @@ -// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). -// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). -// ==================== -// This code is released under the MIT license (SPDX-License-Identifier: MIT) -// ... with NTL restriction stating that: -// No trademark license is granted to use the trade names, trademarks, service -// marks, or product names of Contributor, except as required to fulfill notice -// requirements defined in MIT License. - -extension Megrez { - /// 幅位。 - @frozen public struct SpanUnit { - /// 辭典:以節點長度為索引,以節點為資料值。 - private var lengthNodeMap: [Int: Megrez.Node] = [:] - /// 最長幅距。 - private(set) var maxLength: Int = 0 - - /// 自我清空,各項參數歸零。 - mutating func clear() { - lengthNodeMap.removeAll() - maxLength = 0 - } - - /// 往自身插入一個節點、及給定的節點長度。 - /// - Parameters: - /// - node: 節點。 - /// - length: 給定的節點長度。 - mutating func insert(node: Node, length: Int) { - let length = abs(length) // 防呆 - lengthNodeMap[length] = node - maxLength = max(maxLength, length) - } - - /// 移除任何比給定的長度更長的節點。 - /// - Parameters: - /// - length: 給定的節點長度。 - mutating func dropNodesBeyond(length: Int) { - let length = abs(length) // 防呆 - if length > maxLength { return } - var lenMax = 0 - var removalList: [Int: Megrez.Node] = [:] - for key in lengthNodeMap.keys { - if key > length { - removalList[key] = lengthNodeMap[key] - } else { - lenMax = max(lenMax, key) - } - } - for key in removalList.keys { - lengthNodeMap.removeValue(forKey: key) - } - maxLength = lenMax - } - - /// 給定節點長度,獲取節點。 - /// - Parameters: - /// - length: 給定的節點長度。 - public func nodeOf(length: Int) -> Node? { - // 防呆 Abs() - lengthNodeMap.keys.contains(abs(length)) ? lengthNodeMap[abs(length)] : nil - } - } -} diff --git a/Source/Modules/LanguageParsers/Megrez/4_Node.swift b/Source/Modules/LanguageParsers/Megrez/4_Node.swift deleted file mode 100644 index fe05ca8c..00000000 --- a/Source/Modules/LanguageParsers/Megrez/4_Node.swift +++ /dev/null @@ -1,172 +0,0 @@ -// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). -// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). -// ==================== -// This code is released under the MIT license (SPDX-License-Identifier: MIT) -// ... with NTL restriction stating that: -// No trademark license is granted to use the trade names, trademarks, service -// marks, or product names of Contributor, except as required to fulfill notice -// requirements defined in MIT License. - -extension Megrez { - /// 節點。 - public class Node: Equatable, Hashable { - public static func == (lhs: Megrez.Node, rhs: Megrez.Node) -> Bool { - lhs.key == rhs.key && lhs.score == rhs.score && lhs.unigrams == rhs.unigrams && lhs.bigrams == rhs.bigrams - && lhs.candidates == rhs.candidates && lhs.valueUnigramIndexMap == rhs.valueUnigramIndexMap - && lhs.precedingBigramMap == rhs.precedingBigramMap && lhs.isCandidateFixed == rhs.isCandidateFixed - && lhs.selectedUnigramIndex == rhs.selectedUnigramIndex && lhs.spanLength == rhs.spanLength - } - - public func hash(into hasher: inout Hasher) { - hasher.combine(key) - hasher.combine(score) - hasher.combine(unigrams) - hasher.combine(bigrams) - hasher.combine(spanLength) - hasher.combine(candidates) - hasher.combine(valueUnigramIndexMap) - hasher.combine(precedingBigramMap) - hasher.combine(isCandidateFixed) - hasher.combine(selectedUnigramIndex) - } - - /// 鍵。 - private(set) var key: String = "" - /// 當前節點的當前被選中的候選字詞「在該節點內的」目前的權重。 - private(set) var score: Double = 0 - /// 單元圖陣列。 - private(set) var unigrams: [Unigram] - /// 雙元圖陣列。 - private(set) var bigrams: [Bigram] - /// 指定的幅位長度。 - public var spanLength: Int = 0 - /// 候選字詞陣列,以鍵值陣列的形式存在。 - private(set) var candidates: [KeyValuePaired] = [] - /// 專門「用單元圖資料值來調查索引值」的辭典。 - private var valueUnigramIndexMap: [String: Int] = [:] - /// 專門「用給定鍵值來取對應的雙元圖陣列」的辭典。 - private var precedingBigramMap: [KeyValuePaired: [Megrez.Bigram]] = [:] - /// 狀態標記變數,用來記載當前節點是否處於候選字詞鎖定狀態。 - private(set) var isCandidateFixed: Bool = false - /// 用來登記「當前選中的單元圖」的索引值的變數。 - private var selectedUnigramIndex: Int = 0 - /// 用來登記要施加給「『被標記為選中狀態』的候選字詞」的複寫權重的數值。 - public static let kSelectedCandidateScore: Double = 99 - /// 將當前節點列印成一個字串。 - public var description: String { - "(node,key:\(key),fixed:\(isCandidateFixed ? "true" : "false"),selected:\(selectedUnigramIndex),\(unigrams))" - } - - /// 公開:當前被選中的候選字詞的鍵值配對。 - public var currentPair: KeyValuePaired { - selectedUnigramIndex >= unigrams.count ? KeyValuePaired() : candidates[selectedUnigramIndex] - } - - /// 公開:給出當前單元圖陣列內最高的權重數值。 - public var highestUnigramScore: Double { unigrams.isEmpty ? 0.0 : unigrams[0].score } - - /// 初期化一個節點。 - /// - Parameters: - /// - key: 索引鍵。 - /// - unigrams: 單元圖陣列。 - /// - bigrams: 雙元圖陣列(非必填)。 - public init(key: String = "", spanLength: Int = 0, unigrams: [Megrez.Unigram] = [], bigrams: [Megrez.Bigram] = []) { - self.key = key - self.unigrams = unigrams - self.bigrams = bigrams - self.spanLength = spanLength - - self.unigrams.sort { - $0.score > $1.score - } - - if !self.unigrams.isEmpty { - score = unigrams[0].score - } - - for (i, gram) in self.unigrams.enumerated() { - valueUnigramIndexMap[gram.keyValue.value] = i - candidates.append(gram.keyValue) - } - - for gram in bigrams.lazy.filter({ [self] in - precedingBigramMap.keys.contains($0.precedingKeyValue) - }) { - precedingBigramMap[gram.precedingKeyValue]?.append(gram) - } - } - - /// 對擁有「給定的前述鍵值陣列」的節點提權。 - /// - Parameters: - /// - precedingKeyValues: 前述鍵值陣列。 - public func primeNodeWith(precedingKeyValues: [KeyValuePaired]) { - var newIndex = selectedUnigramIndex - var max = score - - if !isCandidateFixed { - for neta in precedingKeyValues { - let bigrams = precedingBigramMap[neta] ?? [] - for bigram in bigrams.lazy.filter({ [self] in - $0.score > max && valueUnigramIndexMap.keys.contains($0.keyValue.value) - }) { - newIndex = valueUnigramIndexMap[bigram.keyValue.value] ?? newIndex - max = bigram.score - } - } - } - score = max - selectedUnigramIndex = newIndex - } - - /// 選中位於給定索引位置的候選字詞。 - /// - Parameters: - /// - index: 索引位置。 - /// - fix: 是否將當前解點標記為「候選詞已鎖定」的狀態。 - public func selectCandidateAt(index: Int = 0, fix: Bool = false) { - let index = abs(index) - selectedUnigramIndex = index >= unigrams.count ? 0 : index - isCandidateFixed = fix - score = Megrez.Node.kSelectedCandidateScore - } - - /// 重設該節點的候選字詞狀態。 - public func resetCandidate() { - selectedUnigramIndex = 0 - isCandidateFixed = false - if !unigrams.isEmpty { - score = unigrams[0].score - } - } - - /// 選中位於給定索引位置的候選字詞、且施加給定的權重。 - /// - Parameters: - /// - index: 索引位置。 - /// - score: 給定權重條件。 - public func selectFloatingCandidateAt(index: Int, score: Double) { - let index = abs(index) // 防呆 - selectedUnigramIndex = index >= unigrams.count ? 0 : index - isCandidateFixed = false - self.score = score - } - - /// 藉由給定的候選字詞字串,找出在庫的單元圖權重數值。沒有的話就找零。 - /// - Parameters: - /// - candidate: 給定的候選字詞字串。 - public func scoreFor(candidate: String) -> Double { - for unigram in unigrams.lazy.filter({ $0.keyValue.value == candidate }) { - return unigram.score - } - return 0.0 - } - - /// 藉由給定的候選字詞鍵值配對,找出在庫的單元圖權重數值。沒有的話就找零。 - /// - Parameters: - /// - candidate: 給定的候選字詞字串。 - public func scoreForPaired(candidate: KeyValuePaired) -> Double { - for unigram in unigrams.lazy.filter({ $0.keyValue == candidate }) { - return unigram.score - } - return 0.0 - } - } -} diff --git a/Source/Modules/LanguageParsers/Megrez/4_Span.swift b/Source/Modules/LanguageParsers/Megrez/4_Span.swift new file mode 100644 index 00000000..9d7efb30 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/4_Span.swift @@ -0,0 +1,96 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) + +extension Megrez.Compositor { + /// 幅位乃指一組共享起點的節點。 + public class Span { + private var nodes: [Node?] = [] + private(set) var maxLength = 0 + private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength } + public init() { + clear() + } + + public func clear() { + nodes.removeAll() + for _ in 0.. Bool { + guard (1...maxSpanLength).contains(node.spanLength) else { + return false + } + nodes[node.spanLength - 1] = node + maxLength = max(maxLength, node.spanLength) + return true + } + + /// 丟掉任何不小於給定幅位長度的節點。 + /// - Parameter length: 給定的幅位長度。 + /// - Returns: 該操作是否成功執行。 + @discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool { + guard (1...maxSpanLength).contains(length) else { + return false + } + for i in length...maxSpanLength { + nodes[i - 1] = nil + } + maxLength = 0 + guard length > 1 else { return false } + let maxR = length - 2 + for i in 0...maxR { + if nodes[maxR - i] != nil { + maxLength = maxR - i + 1 + break + } + } + return true + } + + public func nodeOf(length: Int) -> Node? { + guard (1...maxSpanLength).contains(length) else { return nil } + return nodes[length - 1] ?? nil + } + } + + // MARK: Internal implementations. + + /// 找出所有與該位置重疊的節點。其返回值為一個節錨陣列(包含節點、以及其起始位置)。 + /// - Parameter location: 游標位置。 + /// - Returns: 一個包含所有與該位置重疊的節點的陣列。 + func fetchOverlappingNodes(at location: Int) -> [NodeAnchor] { + var results = [NodeAnchor]() + guard !spans.isEmpty, location < spans.count else { return results } + + // 先獲取該位置的所有單字節點。 + for theLocation in 1...spans[location].maxLength { + guard let node = spans[location].nodeOf(length: theLocation) else { continue } + results.append(.init(node: node, spanIndex: location)) + } + + // 再獲取以當前位置結尾或開頭的節點。 + let begin: Int = location - min(location, Megrez.Compositor.maxSpanLength - 1) + for theLocation in begin.. [Megrez.Unigram] - - /// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。 - func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] - - /// 給定鍵,確認是否有單元圖記錄在庫。 - func hasUnigramsFor(key: String) -> Bool -} - -extension Megrez { - /// 語言模型框架,回頭實際使用時需要派生一個型別、且重寫相關函式。 - open class LangModel: LangModelProtocol { - public init() {} - - // 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函式的參數設計。 - - /// 給定鍵,讓語言模型找給一組單元圖陣列。 - open func unigramsFor(key: String) -> [Megrez.Unigram] { - key.isEmpty ? [Megrez.Unigram]() : [Megrez.Unigram]() - } - - /// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。 - open func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] { - precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() - } - - /// 給定鍵,確認是否有單元圖記錄在庫。 - open func hasUnigramsFor(key: String) -> Bool { - key.count != 0 - } - } -} diff --git a/Source/Modules/LanguageParsers/Megrez/5_Vertex.swift b/Source/Modules/LanguageParsers/Megrez/5_Vertex.swift new file mode 100644 index 00000000..ea4c44df --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/5_Vertex.swift @@ -0,0 +1,96 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) + +extension Megrez.Compositor { + /// 一個「有向無環圖的」的頂點單位。 + /// + /// 這是一個可變的數據結構,用於有向無環圖的構建和單源最短路徑的計算。 + class Vertex { + /// 前述頂點。 + public var prev: Vertex? + /// 自身屬下的頂點陣列。 + public var edges = [Vertex]() + /// 該變數用於最短路徑的計算。 + /// + /// 我們實際上是在計算具有最大權重的路徑,因此距離的初始值是負無窮的。 + /// 如果我們要計算最短的權重/距離,我們會將其初期值設為正無窮。 + public var distance = -(Double.infinity) + /// 在進行進行位相幾何排序時會用到的狀態標記。 + public var topologicallySorted = false + public var node: Node + public init(node: Node) { + self.node = node + } + } + + /// 卸勁函式。 + /// + /// 「卸勁 (relax)」一詞出自 Cormen 在 2001 年的著作「Introduction to Algorithms」的 585 頁。 + /// - Parameters: + /// - u: 參照頂點,會在必要時成為 v 的前述頂點。 + /// - v: 要影響的頂點。 + func relax(u: Vertex, v: inout Vertex) { + /// 從 u 到 w 的距離,也就是 v 的權重。 + let w: Double = v.node.score + /// 這裡計算最大權重: + /// 如果 v 目前的距離值小於「u 的距離值+w(w 是 u 到 w 的距離,也就是 v 的權重)」, + /// 我們就更新 v 的距離及其前述頂點。 + if v.distance < u.distance + w { + v.distance = u.distance + w + v.prev = u + } + } + + typealias VertexSpan = [Vertex] + + /// 對持有單個根頂點的有向無環圖進行位相幾何排序(topological + /// sort)、且將排序結果以頂點陣列的形式給出。 + /// + /// 這裡使用我們自己的堆棧和狀態定義實現了一個非遞迴版本, + /// 這樣我們就不會受到當前線程的堆棧大小的限制。以下是等價的原始算法。 + /// ``` + /// func topologicalSort(vertex: Vertex) { + /// for vertexNode in vertex.edges { + /// if !vertexNode.topologicallySorted { + /// dfs(vertexNode, result) + /// vertexNode.topologicallySorted = true + /// } + /// result.append(vertexNode) + /// } + /// } + /// ``` + /// 至於遞迴版本則類似於 Cormen 在 2001 年的著作「Introduction to Algorithms」當中的樣子。 + /// - Parameter root: 根頂點。 + /// - Returns: 排序結果(頂點陣列)。 + func topologicalSort(root: Vertex) -> [Vertex] { + class State { + var iterIndex: Int + var vertex: Vertex + init(vertex: Vertex, iterIndex: Int = 0) { + self.vertex = vertex + self.iterIndex = iterIndex + } + } + var result = [Vertex]() + var stack = [State]() + stack.append(.init(vertex: root)) + while !stack.isEmpty { + let state = stack[stack.count - 1] + let theVertex = state.vertex + if state.iterIndex < state.vertex.edges.count { + let newVertex = state.vertex.edges[state.iterIndex] + state.iterIndex += 1 + if !newVertex.topologicallySorted { + stack.append(.init(vertex: newVertex)) + continue + } + } + theVertex.topologicallySorted = true + result.append(theVertex) + stack.removeLast() + } + return result + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift b/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift deleted file mode 100644 index d355a016..00000000 --- a/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift +++ /dev/null @@ -1,64 +0,0 @@ -// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). -// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). -// ==================== -// This code is released under the MIT license (SPDX-License-Identifier: MIT) -// ... with NTL restriction stating that: -// No trademark license is granted to use the trade names, trademarks, service -// marks, or product names of Contributor, except as required to fulfill notice -// requirements defined in MIT License. - -extension Megrez { - /// 雙元圖。 - @frozen public struct Bigram: Equatable, CustomStringConvertible, Hashable { - /// 當前鍵值。 - public var keyValue: KeyValuePaired - /// 前述鍵值。 - public var precedingKeyValue: KeyValuePaired - /// 權重。 - public var score: Double - /// 將當前雙元圖列印成一個字串。 - public var description: String { - "(" + keyValue.description + "|" + precedingKeyValue.description + "," + String(score) + ")" - } - - /// 初期化一筆「雙元圖」。一筆雙元圖由一組前述鍵值配對、一組當前鍵值配對、與一筆權重數值組成。 - /// - Parameters: - /// - precedingKeyValue: 前述鍵值。 - /// - keyValue: 當前鍵值。 - /// - score: 權重(雙精度小數)。 - public init(precedingKeyValue: KeyValuePaired, keyValue: KeyValuePaired, score: Double) { - self.keyValue = keyValue - self.precedingKeyValue = precedingKeyValue - self.score = score - } - - public func hash(into hasher: inout Hasher) { - hasher.combine(keyValue) - hasher.combine(precedingKeyValue) - hasher.combine(score) - // hasher.combine(paired) - } - - public static func == (lhs: Bigram, rhs: Bigram) -> Bool { - lhs.precedingKeyValue == rhs.precedingKeyValue && lhs.keyValue == rhs.keyValue && lhs.score == rhs.score - } - - public static func < (lhs: Bigram, rhs: Bigram) -> Bool { - lhs.precedingKeyValue < rhs.precedingKeyValue - || (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.score < rhs.score)) - } - } -} - -// MARK: - DumpDOT-related functions. - -extension Array where Element == Megrez.Bigram { - /// 將雙元圖陣列列印成一個字串。 - public var description: String { - var arrOutputContent = [""] - for (index, gram) in enumerated() { - arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.description]) - } - return "[" + String(count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}" - } -} diff --git a/Source/Modules/LanguageParsers/Megrez/6_Node.swift b/Source/Modules/LanguageParsers/Megrez/6_Node.swift new file mode 100644 index 00000000..a42f1788 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/6_Node.swift @@ -0,0 +1,142 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) + +extension Megrez.Compositor { + /// 一個節點由這些內容組成:幅位長度、索引鍵、以及一組單元圖。幅位長度就是指這個 + /// 節點在組字器內橫跨了多少個字長。組字器負責構築自身的節點。對於由多個漢字組成 + /// 的詞,組字器會將多個讀音索引鍵合併為一個讀音索引鍵、據此向語言模組請求對應的 + /// 單元圖結果陣列。舉例說,如果一個詞有兩個漢字組成的話,那麼讀音也是有兩個、其 + /// 索引鍵值也是由兩個讀音組成的,那麼這個節點的幅位長度就是 2。 + public class Node: Equatable, Hashable { + /// 三種不同的針對一個節點的覆寫行為。 + /// - withNoOverrides: 無覆寫行為。 + /// - withTopUnigramScore: 使用指定的單元圖資料值來覆寫該節點,但卻使用 + /// 當前狀態下權重最高的單元圖的權重數值。打比方說,如果該節點內的單元圖陣列是 + /// [("a", -114), ("b", -514), ("c", -1919)] 的話,指定該覆寫行為則會導致該節 + /// 點返回的結果為 ("c", -114)。該覆寫行為多用於諸如使用者半衰記憶模組的建議 + /// 行為。被覆寫的這個節點的狀態可能不會再被爬軌行為擅自改回。該覆寫行為無法 + /// 防止其它節點被爬軌函數所支配。這種情況下就需要用到 kOverridingScore + /// - withHighScore: 將該節點權重覆寫為 kOverridingScore,使其被爬軌函數所青睞。 + public enum OverrideType: Int { + case withNoOverrides = 0 + case withTopUnigramScore = 1 + case withHighScore = 2 + } + + /// 一個用以覆寫權重的數值。該數值之高足以改變爬軌函數對該節點的讀取結果。這裡用 + /// 「0」可能看似足夠了,但仍會使得該節點的覆寫狀態有被爬軌函數忽視的可能。比方說 + /// 要針對索引鍵「a b c」複寫的資料值為「A B C」,使用大寫資料值來覆寫節點。這時, + /// 如果這個獨立的 c 有一個可以拮抗權重的詞「bc」的話,可能就會導致爬軌函數的算法 + /// 找出「A->bc」的爬軌途徑(尤其是當 A 和 B 使用「0」作為複寫數值的情況下)。這樣 + /// 一來,「A-B」就不一定始終會是爬軌函數的青睞結果了。所以,這裡一定要用大於 0 的 + /// 數(比如野獸常數),以讓「c」更容易單獨被選中。 + public static let kOverridingScore: Double = 114_514 + + private(set) var key: String + private(set) var keyArray: [String] + private(set) var spanLength: Int + private(set) var unigrams: [Megrez.Unigram] + private(set) var currentUnigramIndex: Int = 0 { + didSet { currentUnigramIndex = min(max(0, currentUnigramIndex), unigrams.count - 1) } + } + + public var currentPair: Megrez.Compositor.Candidate { .init(key: key, value: value) } + + public func hash(into hasher: inout Hasher) { + hasher.combine(key) + hasher.combine(spanLength) + hasher.combine(unigrams) + hasher.combine(currentUnigramIndex) + hasher.combine(spanLength) + hasher.combine(overrideType) + } + + private(set) var overrideType: Node.OverrideType + + public static func == (lhs: Node, rhs: Node) -> Bool { + lhs.key == rhs.key && lhs.spanLength == rhs.spanLength + && lhs.unigrams == rhs.unigrams && lhs.overrideType == rhs.overrideType + } + + public init( + keyArray: [String] = [], spanLength: Int = 0, unigrams: [Megrez.Unigram] = [], keySeparator: String = "" + ) { + key = keyArray.joined(separator: keySeparator) + self.keyArray = keyArray + self.spanLength = spanLength + self.unigrams = unigrams + overrideType = .withNoOverrides + } + + /// 給出目前的最高權重單元圖。該結果可能會受節點覆寫狀態所影響。 + /// - Returns: 目前的最高權重單元圖。該結果可能會受節點覆寫狀態所影響。 + public var currentUnigram: Megrez.Unigram { + unigrams.isEmpty ? .init() : unigrams[currentUnigramIndex] + } + + public var value: String { currentUnigram.value } + + public var score: Double { + guard !unigrams.isEmpty else { return 0 } + switch overrideType { + case .withHighScore: return Megrez.Compositor.Node.kOverridingScore + case .withTopUnigramScore: return unigrams[0].score + default: return currentUnigram.score + } + } + + public var isOverriden: Bool { + overrideType != .withNoOverrides + } + + public func reset() { + currentUnigramIndex = 0 + overrideType = .withNoOverrides + } + + public func selectOverrideUnigram(value: String, type: Node.OverrideType) -> Bool { + guard type != .withNoOverrides else { + return false + } + for (i, gram) in unigrams.enumerated() { + if value != gram.value { continue } + currentUnigramIndex = i + overrideType = type + return true + } + return false + } + } +} + +extension Megrez.Compositor { + /// 節錨。 + /// + /// 在 Gramambular 當中又被稱為「NodeInSpan」。 + public struct NodeAnchor: Hashable { + let node: Megrez.Compositor.Node + let spanIndex: Int // 幅位座標 + var spanLength: Int { node.spanLength } + var unigrams: [Megrez.Unigram] { node.unigrams } + var key: String { node.key } + var value: String { node.value } + + /// 將該節錨雜湊化。 + public func hash(into hasher: inout Hasher) { + hasher.combine(node) + hasher.combine(spanIndex) + } + } +} + +// MARK: - Array Extensions. + +extension Array where Element == Megrez.Compositor.Node { + /// 從一個節點陣列當中取出目前的自動選字字串陣列。 + public var values: [String] { map(\.value) } + + /// 從一個節點陣列當中取出目前的索引鍵陣列。 + public var keys: [String] { map(\.key) } +} diff --git a/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift b/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift deleted file mode 100644 index d6e78ac8..00000000 --- a/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift +++ /dev/null @@ -1,57 +0,0 @@ -// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). -// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). -// ==================== -// This code is released under the MIT license (SPDX-License-Identifier: MIT) -// ... with NTL restriction stating that: -// No trademark license is granted to use the trade names, trademarks, service -// marks, or product names of Contributor, except as required to fulfill notice -// requirements defined in MIT License. - -extension Megrez { - /// 單元圖。 - @frozen public struct Unigram: Equatable, CustomStringConvertible, Hashable { - /// 鍵值。 - public var keyValue: KeyValuePaired - /// 權重。 - public var score: Double - /// 將當前單元圖列印成一個字串。 - public var description: String { - "(" + keyValue.description + "," + String(score) + ")" - } - - /// 初期化一筆「單元圖」。一筆單元圖由一組鍵值配對與一筆權重數值組成。 - /// - Parameters: - /// - keyValue: 鍵值。 - /// - score: 權重(雙精度小數)。 - public init(keyValue: KeyValuePaired, score: Double) { - self.keyValue = keyValue - self.score = score - } - - public func hash(into hasher: inout Hasher) { - hasher.combine(keyValue) - hasher.combine(score) - } - - public static func == (lhs: Unigram, rhs: Unigram) -> Bool { - lhs.keyValue == rhs.keyValue && lhs.score == rhs.score - } - - public static func < (lhs: Unigram, rhs: Unigram) -> Bool { - lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.score < rhs.score) - } - } -} - -// MARK: - DumpDOT-related functions. - -extension Array where Element == Megrez.Unigram { - /// 將單元圖陣列列印成一個字串。 - public var description: String { - var arrOutputContent = [""] - for (index, gram) in enumerated() { - arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.description]) - } - return "[" + String(count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}" - } -} diff --git a/Source/Modules/LanguageParsers/Megrez/7_KeyValuePaired.swift b/Source/Modules/LanguageParsers/Megrez/7_KeyValuePaired.swift deleted file mode 100644 index 5678e615..00000000 --- a/Source/Modules/LanguageParsers/Megrez/7_KeyValuePaired.swift +++ /dev/null @@ -1,58 +0,0 @@ -// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). -// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). -// ==================== -// This code is released under the MIT license (SPDX-License-Identifier: MIT) -// ... with NTL restriction stating that: -// No trademark license is granted to use the trade names, trademarks, service -// marks, or product names of Contributor, except as required to fulfill notice -// requirements defined in MIT License. - -extension Megrez { - /// 鍵值配對。 - @frozen public struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible { - /// 鍵。一般情況下用來放置讀音等可以用來作為索引的內容。 - public var key: String - /// 資料值。 - public var value: String - /// 將當前鍵值列印成一個字串。 - public var description: String { "(" + key + "," + value + ")" } - /// 判斷當前鍵值配對是否合規。如果鍵與值有任一為空,則結果為 false。 - public var isValid: Bool { !key.isEmpty && !value.isEmpty } - /// 將當前鍵值列印成一個字串,但如果該鍵值配對為空的話則僅列印「()」。 - public var toNGramKey: String { !isValid ? "()" : "(" + key + "," + value + ")" } - - /// 初期化一組鍵值配對。 - /// - Parameters: - /// - key: 鍵。一般情況下用來放置讀音等可以用來作為索引的內容。 - /// - value: 資料值。 - public init(key: String = "", value: String = "") { - self.key = key - self.value = value - } - - public func hash(into hasher: inout Hasher) { - hasher.combine(key) - hasher.combine(value) - } - - public static func == (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { - lhs.key == rhs.key && lhs.value == rhs.value - } - - public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { - (lhs.key.count < rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value < rhs.value) - } - - public static func > (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { - (lhs.key.count > rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value > rhs.value) - } - - public static func <= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { - (lhs.key.count <= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value <= rhs.value) - } - - public static func >= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { - (lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value) - } - } -} diff --git a/Source/Modules/LanguageParsers/Megrez/7_LangModel.swift b/Source/Modules/LanguageParsers/Megrez/7_LangModel.swift new file mode 100644 index 00000000..b08a6ed0 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/7_LangModel.swift @@ -0,0 +1,61 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) + +/// 語言模組協定。 +public protocol LangModelProtocol { + /// 給定鍵,讓語言模型找給一組單元圖陣列。 + func unigramsFor(key: String) -> [Megrez.Unigram] + /// 給定鍵,確認是否有單元圖記錄在庫。 + func hasUnigramsFor(key: String) -> Bool +} + +extension Megrez.Compositor { + /// 一個套殼語言模型,用來始終返回經過排序的單元圖。 + public class LangModelRanked: LangModelProtocol { + private let langModel: LangModelProtocol + /// 一個套殼語言模型,用來始終返回經過排序的單元圖。 + /// - Parameter withLM: 用來對接的語言模型。 + public init(withLM: LangModelProtocol) { + langModel = withLM + } + + /// 給定索引鍵,讓語言模型找給一組經過穩定排序的單元圖陣列。 + /// - Parameter key: 給定的索引鍵字串。 + /// - Returns: 對應的經過穩定排序的單元圖陣列。 + public func unigramsFor(key: String) -> [Megrez.Unigram] { + langModel.unigramsFor(key: key).stableSorted { $0.score > $1.score } + } + + /// 根據給定的索引鍵來確認各個資料庫陣列內是否存在對應的資料。 + /// - Parameter key: 索引鍵。 + /// - Returns: 是否在庫。 + public func hasUnigramsFor(key: String) -> Bool { + langModel.hasUnigramsFor(key: key) + } + } +} + +// MARK: - Stable Sort Extension + +// Reference: https://stackoverflow.com/a/50545761/4162914 + +extension Sequence { + /// Return a stable-sorted collection. + /// + /// - Parameter areInIncreasingOrder: Return nil when two element are equal. + /// - Returns: The sorted collection. + fileprivate func stableSorted( + by areInIncreasingOrder: (Element, Element) throws -> Bool + ) + rethrows -> [Element] + { + try enumerated() + .sorted { a, b -> Bool in + try areInIncreasingOrder(a.element, b.element) + || (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element)) + } + .map(\.element) + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/8_Unigram.swift b/Source/Modules/LanguageParsers/Megrez/8_Unigram.swift new file mode 100644 index 00000000..b8aa9cb2 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/8_Unigram.swift @@ -0,0 +1,40 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) + +extension Megrez { + /// 單元圖。 + @frozen public struct Unigram: Equatable, CustomStringConvertible, Hashable { + /// 鍵值。 + public var value: String + /// 權重。 + public var score: Double + /// 將當前單元圖列印成一個字串。 + public var description: String { + "(" + value.description + "," + String(score) + ")" + } + + /// 初期化一筆「單元圖」。一筆單元圖由一組鍵值配對與一筆權重數值組成。 + /// - Parameters: + /// - value: 鍵值。 + /// - score: 權重(雙精度小數)。 + public init(value: String = "", score: Double = 0) { + self.value = value + self.score = score + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(value) + hasher.combine(score) + } + + public static func == (lhs: Unigram, rhs: Unigram) -> Bool { + lhs.value == rhs.value && lhs.score == rhs.score + } + + public static func < (lhs: Unigram, rhs: Unigram) -> Bool { + lhs.value < rhs.value || (lhs.value == rhs.value && lhs.score < rhs.score) + } + } +} diff --git a/Source/Resources/Base.lproj/Localizable.strings b/Source/Resources/Base.lproj/Localizable.strings index 9b8d0199..69440ed2 100644 --- a/Source/Resources/Base.lproj/Localizable.strings +++ b/Source/Resources/Base.lproj/Localizable.strings @@ -29,7 +29,7 @@ "\"%@\" length must ≥ 2 for a user phrase." = "\"%@\" length must ≥ 2 for a user phrase."; "\"%@\" length should ≤ %d for a user phrase." = "\"%@\" length should ≤ %d for a user phrase."; "\"%@\" selected. ENTER to add user phrase." = "\"%@\" selected. ENTER to add user phrase."; -"\"%@\" already exists: ENTER to boost, \n SHIFT+CMD+ENTER to exclude." = "\"%@\" already exists: ENTER to boost, \n SHIFT+CMD+ENTER to exclude."; +"\"%@\" already exists: ENTER to boost, SHIFT+CMD+ENTER to nerf, \n BackSpace or Delete key to exclude." = "\"%@\" already exists: ENTER to boost, SHIFT+CMD+ENTER to nerf, \n BackSpace or Delete key to exclude."; "Edit Phrase Replacement Table…" = "Edit Phrase Replacement Table…"; "Use Phrase Replacement" = "Use Phrase Replacement"; "Candidates keys cannot be empty." = "Candidates keys cannot be empty."; @@ -113,6 +113,7 @@ "Choose or hit Enter to confim your prefered keys for selecting candidates." = "Choose or hit Enter to confim your prefered keys for selecting candidates."; "Choose the behavior of (Shift+)Space key with candidates." = "Choose the behavior of (Shift+)Space key with candidates."; "Choose the behavior of (Shift+)Tab key in the candidate window." = "Choose the behavior of (Shift+)Tab key in the candidate window."; +"Choose the behavior of Shift+Letter key with letter inputs." = "Choose the behavior of Shift+Letter key with letter inputs."; "Choose the cursor position where you want to list possible candidates." = "Choose the cursor position where you want to list possible candidates."; "Choose the macOS-level basic keyboard layout." = "Choose the macOS-level basic keyboard layout."; "Choose the phonetic layout for Mandarin parser." = "Choose the phonetic layout for Mandarin parser."; @@ -123,6 +124,8 @@ "Dachen 26 (libChewing)" = "Dachen 26 (libChewing)"; "Debug Mode" = "Debug Mode"; "Dictionary" = "Dictionary"; +"Directly commit lowercased letters" = "Directly commit lowercased letters"; +"Directly commit uppercased letters" = "Directly commit uppercased letters"; "Emulating select-candidate-per-character mode" = "Emulating select-candidate-per-character mode"; "Enable CNS11643 Support (2022-07-20)" = "Enable CNS11643 Support (2022-07-20)"; "Enable Space key for calling candidate window" = "Enable Space key for calling candidate window"; @@ -143,8 +146,8 @@ "IBM" = "IBM"; "in front of the phrase (like macOS built-in Zhuyin IME)" = "in front of the phrase (like macOS built-in Zhuyin IME)"; "Japanese" = "Japanese"; -"Keyboard" = "Keyboard"; "Keyboard Shortcuts:" = "Keyboard Shortcuts:"; +"Keyboard" = "Keyboard"; "Misc Settings:" = "Misc Settings:"; "MiTAC" = "MiTAC"; "Non-QWERTY alphanumeral keyboard layouts are for Hanyu Pinyin parser only." = "Non-QWERTY alphanumeral keyboard layouts are for Hanyu Pinyin parser only."; @@ -155,6 +158,7 @@ "Secondary Pinyin with Numeral Intonation" = "Secondary Pinyin with Numeral Intonation"; "Seigyou" = "Seigyou (JinYei)"; "Selection Keys:" = "Selection Keys:"; +"Shift+Letter:" = "Shift+Letter:"; "Show Hanyu-Pinyin in the inline composition buffer & tooltip" = "Show Hanyu-Pinyin in the inline composition buffer & tooltip"; "Show page buttons in candidate window" = "Show page buttons in candidate window"; "Simplified Chinese" = "Simplified Chinese"; @@ -163,6 +167,7 @@ "Starlight" = "Starlight"; "Stop farting (when typed phonetic combination is invalid, etc.)" = "Stop farting (when typed phonetic combination is invalid, etc.)"; "Traditional Chinese" = "Traditional Chinese"; +"Type them into inline composition buffer" = "Type them into inline composition buffer"; "Typing Style:" = "Typing Style:"; "UI Language:" = "UI Language:"; "Universal Pinyin with Numeral Intonation" = "Universal Pinyin with Numeral Intonation"; diff --git a/Source/Resources/en.lproj/Localizable.strings b/Source/Resources/en.lproj/Localizable.strings index 9b8d0199..69440ed2 100644 --- a/Source/Resources/en.lproj/Localizable.strings +++ b/Source/Resources/en.lproj/Localizable.strings @@ -29,7 +29,7 @@ "\"%@\" length must ≥ 2 for a user phrase." = "\"%@\" length must ≥ 2 for a user phrase."; "\"%@\" length should ≤ %d for a user phrase." = "\"%@\" length should ≤ %d for a user phrase."; "\"%@\" selected. ENTER to add user phrase." = "\"%@\" selected. ENTER to add user phrase."; -"\"%@\" already exists: ENTER to boost, \n SHIFT+CMD+ENTER to exclude." = "\"%@\" already exists: ENTER to boost, \n SHIFT+CMD+ENTER to exclude."; +"\"%@\" already exists: ENTER to boost, SHIFT+CMD+ENTER to nerf, \n BackSpace or Delete key to exclude." = "\"%@\" already exists: ENTER to boost, SHIFT+CMD+ENTER to nerf, \n BackSpace or Delete key to exclude."; "Edit Phrase Replacement Table…" = "Edit Phrase Replacement Table…"; "Use Phrase Replacement" = "Use Phrase Replacement"; "Candidates keys cannot be empty." = "Candidates keys cannot be empty."; @@ -113,6 +113,7 @@ "Choose or hit Enter to confim your prefered keys for selecting candidates." = "Choose or hit Enter to confim your prefered keys for selecting candidates."; "Choose the behavior of (Shift+)Space key with candidates." = "Choose the behavior of (Shift+)Space key with candidates."; "Choose the behavior of (Shift+)Tab key in the candidate window." = "Choose the behavior of (Shift+)Tab key in the candidate window."; +"Choose the behavior of Shift+Letter key with letter inputs." = "Choose the behavior of Shift+Letter key with letter inputs."; "Choose the cursor position where you want to list possible candidates." = "Choose the cursor position where you want to list possible candidates."; "Choose the macOS-level basic keyboard layout." = "Choose the macOS-level basic keyboard layout."; "Choose the phonetic layout for Mandarin parser." = "Choose the phonetic layout for Mandarin parser."; @@ -123,6 +124,8 @@ "Dachen 26 (libChewing)" = "Dachen 26 (libChewing)"; "Debug Mode" = "Debug Mode"; "Dictionary" = "Dictionary"; +"Directly commit lowercased letters" = "Directly commit lowercased letters"; +"Directly commit uppercased letters" = "Directly commit uppercased letters"; "Emulating select-candidate-per-character mode" = "Emulating select-candidate-per-character mode"; "Enable CNS11643 Support (2022-07-20)" = "Enable CNS11643 Support (2022-07-20)"; "Enable Space key for calling candidate window" = "Enable Space key for calling candidate window"; @@ -143,8 +146,8 @@ "IBM" = "IBM"; "in front of the phrase (like macOS built-in Zhuyin IME)" = "in front of the phrase (like macOS built-in Zhuyin IME)"; "Japanese" = "Japanese"; -"Keyboard" = "Keyboard"; "Keyboard Shortcuts:" = "Keyboard Shortcuts:"; +"Keyboard" = "Keyboard"; "Misc Settings:" = "Misc Settings:"; "MiTAC" = "MiTAC"; "Non-QWERTY alphanumeral keyboard layouts are for Hanyu Pinyin parser only." = "Non-QWERTY alphanumeral keyboard layouts are for Hanyu Pinyin parser only."; @@ -155,6 +158,7 @@ "Secondary Pinyin with Numeral Intonation" = "Secondary Pinyin with Numeral Intonation"; "Seigyou" = "Seigyou (JinYei)"; "Selection Keys:" = "Selection Keys:"; +"Shift+Letter:" = "Shift+Letter:"; "Show Hanyu-Pinyin in the inline composition buffer & tooltip" = "Show Hanyu-Pinyin in the inline composition buffer & tooltip"; "Show page buttons in candidate window" = "Show page buttons in candidate window"; "Simplified Chinese" = "Simplified Chinese"; @@ -163,6 +167,7 @@ "Starlight" = "Starlight"; "Stop farting (when typed phonetic combination is invalid, etc.)" = "Stop farting (when typed phonetic combination is invalid, etc.)"; "Traditional Chinese" = "Traditional Chinese"; +"Type them into inline composition buffer" = "Type them into inline composition buffer"; "Typing Style:" = "Typing Style:"; "UI Language:" = "UI Language:"; "Universal Pinyin with Numeral Intonation" = "Universal Pinyin with Numeral Intonation"; diff --git a/Source/Resources/ja.lproj/Localizable.strings b/Source/Resources/ja.lproj/Localizable.strings index 146eefca..65626401 100644 --- a/Source/Resources/ja.lproj/Localizable.strings +++ b/Source/Resources/ja.lproj/Localizable.strings @@ -29,7 +29,7 @@ "\"%@\" length must ≥ 2 for a user phrase." = "「%@」もう1つ文字のお選びを。"; "\"%@\" length should ≤ %d for a user phrase." = "「%@」文字数過剰で登録不可、%d 文字以内にして下さい。"; "\"%@\" selected. ENTER to add user phrase." = "「%@」を ENTER で辞書に登録。"; -"\"%@\" already exists: ENTER to boost, \n SHIFT+CMD+ENTER to exclude." = "「%@」は既存語彙:ENTER で最優先にし;\n SHIFT+CMD+ENTER で排除。"; +"\"%@\" already exists: ENTER to boost, SHIFT+CMD+ENTER to nerf, \n BackSpace or Delete key to exclude." = "「%@」は既存語彙:ENTER で最優先にし、SHIFT+CMD+ENTER で優先順位を下げる;\n BackSpace 或いは Delete で排除。"; "Edit Phrase Replacement Table…" = "言葉置換表を編集…"; "Use Phrase Replacement" = "言葉置換機能"; "Candidates keys cannot be empty." = "言選り用キー陣列に何かキーをご登録ください。"; @@ -113,6 +113,7 @@ "Choose or hit Enter to confim your prefered keys for selecting candidates." = "お好きなる言選り用キー陣列をご指定ください。新しい組み合わせは Enter で効かす。"; "Choose the behavior of (Shift+)Space key with candidates." = "入力候補についての (Shift+)Space キーの輪番切替対象をご指定ください。"; "Choose the behavior of (Shift+)Tab key in the candidate window." = "入力候補陳列での (Shift+)Tab キーの輪番切替対象をご指定ください。"; +"Choose the behavior of Shift+Letter key with letter inputs." = "Shift+文字キーの行為をご指定ください。"; "Choose the cursor position where you want to list possible candidates." = "カーソルはどこで入力候補を呼び出すかとご指定ださい。"; "Choose the macOS-level basic keyboard layout." = "macOS 基礎キーボード配置をご指定ください。"; "Choose the phonetic layout for Mandarin parser." = "共通語分析器の注音配列をご指定ください。"; @@ -123,6 +124,8 @@ "Dachen 26 (libChewing)" = "酷音大千 26 キー配列"; "Debug Mode" = "欠陥辿着モード"; "Dictionary" = "辞書設定"; +"Directly commit lowercased letters" = "ローマ字(小文字)を直接出力"; +"Directly commit uppercased letters" = "ローマ字(大文字)を直接出力"; "Emulating select-candidate-per-character mode" = "漢字1つづつ全候補選択入力モード"; "Enable CNS11643 Support (2022-07-20)" = "全字庫モード // 入力可能な漢字数を倍増す (2022-07-20)"; "Enable Space key for calling candidate window" = "Space キーで入力候補を呼び出す"; @@ -143,8 +146,8 @@ "IBM" = "IBM 配列"; "in front of the phrase (like macOS built-in Zhuyin IME)" = "単語の前で // macOS 内蔵注音入力のやり方"; "Japanese" = "和語"; -"Keyboard" = "配列設定"; "Keyboard Shortcuts:" = "ショートカット:"; +"Keyboard" = "配列設定"; "Misc Settings:" = "他の設定:"; "MiTAC" = "神通配列"; "Non-QWERTY alphanumeral keyboard layouts are for Hanyu Pinyin parser only." = "QWERTY 以外の英数キーボードは漢語弁音以外の配列に不適用。"; @@ -155,6 +158,7 @@ "Secondary Pinyin with Numeral Intonation" = "国音二式 (ローマ字+数字音調)"; "Seigyou" = "精業配列"; "Selection Keys:" = "言選り用キー:"; +"Shift+Letter:" = "Shift+文字キー:"; "Show Hanyu-Pinyin in the inline composition buffer & tooltip" = "弁音合併入力(入力緩衝列とヒントで音読みを漢語弁音に)"; "Show page buttons in candidate window" = "入力候補陳列の側にページボタンを表示"; "Simplified Chinese" = "簡体中国語"; @@ -163,6 +167,7 @@ "Starlight" = "星光配列"; "Stop farting (when typed phonetic combination is invalid, etc.)" = "マナーモード // 外すと入力間違った時に変な声が出る"; "Traditional Chinese" = "繁体中国語"; +"Type them into inline composition buffer" = "入力緩衝列にローマ字入力"; "Typing Style:" = "入力習慣:"; "UI Language:" = "表示用言語:"; "Universal Pinyin with Numeral Intonation" = "汎用弁音 (ローマ字+数字音調)"; diff --git a/Source/Resources/zh-Hans.lproj/Localizable.strings b/Source/Resources/zh-Hans.lproj/Localizable.strings index 3cc45095..14b6b4c8 100644 --- a/Source/Resources/zh-Hans.lproj/Localizable.strings +++ b/Source/Resources/zh-Hans.lproj/Localizable.strings @@ -29,7 +29,7 @@ "\"%@\" length must ≥ 2 for a user phrase." = "「%@」字数不足以自订语汇。"; "\"%@\" length should ≤ %d for a user phrase." = "「%@」字数超过 %d、无法自订。"; "\"%@\" selected. ENTER to add user phrase." = "「%@」敲 Enter 添入自订语汇。"; -"\"%@\" already exists: ENTER to boost, \n SHIFT+CMD+ENTER to exclude." = "「%@」已存在:敲 Enter 以升权;\n 敲 Shift+CMD+Enter 以排除。"; +"\"%@\" already exists: ENTER to boost, SHIFT+CMD+ENTER to nerf, \n BackSpace or Delete key to exclude." = "「%@」已存在:敲 Enter 以升权、敲 Shift+CMD+Enter 以降权;\n 敲 BackSpace 或 Delete 以排除。"; "Edit Phrase Replacement Table…" = "编辑语汇置换表…"; "Use Phrase Replacement" = "使用语汇置换"; "Candidates keys cannot be empty." = "您必须指定选字键。"; @@ -113,6 +113,7 @@ "Choose or hit Enter to confim your prefered keys for selecting candidates." = "请选择您所偏好的用来选字的按键组合。自订组合需敲 Enter 键生效。"; "Choose the behavior of (Shift+)Space key with candidates." = "指定 (Shift+)空格键 对候选字词而言的轮替操作对象。"; "Choose the behavior of (Shift+)Tab key in the candidate window." = "指定 (Shift+)Tab 在选字窗内的轮替操作对象。"; +"Choose the behavior of Shift+Letter key with letter inputs." = "指定 Shift+字母键 的行为。"; "Choose the cursor position where you want to list possible candidates." = "请选择用以触发选字的游标相对位置。"; "Choose the macOS-level basic keyboard layout." = "请选择 macOS 基础键盘布局。"; "Choose the phonetic layout for Mandarin parser." = "请指定普通话/国音分析器所使用的注音排列。"; @@ -123,6 +124,8 @@ "Dachen 26 (libChewing)" = "酷音大千二十六键排列"; "Debug Mode" = "侦错模式"; "Dictionary" = "辞典"; +"Directly commit lowercased letters" = "直接递交小写字母"; +"Directly commit uppercased letters" = "直接递交大写字母"; "Emulating select-candidate-per-character mode" = "模拟 90 年代前期注音逐字选字输入风格"; "Enable CNS11643 Support (2022-07-20)" = "启用 CNS11643 全字库支援 (2022-07-20)"; "Enable Space key for calling candidate window" = "敲空格键以呼出候选字窗"; @@ -144,8 +147,8 @@ "IBM" = "IBM 排列"; "in front of the phrase (like macOS built-in Zhuyin IME)" = "将游标置于词语前方 // macOS 内建注音风格"; "Japanese" = "和语"; -"Keyboard" = "键盘"; "Keyboard Shortcuts:" = "键盘快捷键:"; +"Keyboard" = "键盘"; "Misc Settings:" = "杂项:"; "MiTAC" = "神通排列"; "Non-QWERTY alphanumeral keyboard layouts are for Hanyu Pinyin parser only." = "QWERTY 以外的英数布局是为了汉语拼音排列使用者而准备的。"; @@ -156,6 +159,7 @@ "Secondary Pinyin with Numeral Intonation" = "国音二式+数字标调"; "Seigyou" = "精业排列"; "Selection Keys:" = "选字键:"; +"Shift+Letter:" = "Shift+字母键:"; "Show Hanyu-Pinyin in the inline composition buffer & tooltip" = "拼音并击(组字区与工具提示内显示汉语拼音)"; "Show page buttons in candidate window" = "在选字窗内显示翻页按钮"; "Simplified Chinese" = "简体中文"; @@ -164,6 +168,7 @@ "Starlight" = "星光排列"; "Stop farting (when typed phonetic combination is invalid, etc.)" = "廉耻模式 // 取消勾选的话,敲错字时会有异音"; "Traditional Chinese" = "繁体中文"; +"Type them into inline composition buffer" = "直接键入内文组字区"; "Typing Style:" = "输入风格:"; "UI Language:" = "介面语言:"; "Universal Pinyin with Numeral Intonation" = "通用拼音+数字标调"; diff --git a/Source/Resources/zh-Hant.lproj/Localizable.strings b/Source/Resources/zh-Hant.lproj/Localizable.strings index 92c828cc..17cc4a4f 100644 --- a/Source/Resources/zh-Hant.lproj/Localizable.strings +++ b/Source/Resources/zh-Hant.lproj/Localizable.strings @@ -29,7 +29,7 @@ "\"%@\" length must ≥ 2 for a user phrase." = "「%@」字數不足以自訂語彙。"; "\"%@\" length should ≤ %d for a user phrase." = "「%@」字數超過 %d、無法自訂。"; "\"%@\" selected. ENTER to add user phrase." = "「%@」敲 Enter 添入自訂語彙。"; -"\"%@\" already exists: ENTER to boost, \n SHIFT+CMD+ENTER to exclude." = "「%@」已存在:敲 Enter 以升權;\n 敲 Shift+CMD+Enter 以排除。"; +"\"%@\" already exists: ENTER to boost, SHIFT+CMD+ENTER to nerf, \n BackSpace or Delete key to exclude." = "「%@」已存在:敲 Enter 以升權、敲 Shift+CMD+Enter 以降權;\n 敲 BackSpace 或 Delete 以排除。"; "Edit Phrase Replacement Table…" = "編輯語彙置換表…"; "Use Phrase Replacement" = "使用語彙置換"; "Candidates keys cannot be empty." = "您必須指定選字鍵。"; @@ -113,6 +113,7 @@ "Choose or hit Enter to confim your prefered keys for selecting candidates." = "請選擇您所偏好的用來選字的按鍵組合。自訂組合需敲 Enter 鍵生效。"; "Choose the behavior of (Shift+)Space key with candidates." = "指定 (Shift+)空格鍵 對候選字詞而言的輪替操作對象。"; "Choose the behavior of (Shift+)Tab key in the candidate window." = "指定 (Shift+)Tab 在選字窗內的輪替操作對象。"; +"Choose the behavior of Shift+Letter key with letter inputs." = "指定 Shift+字母鍵 的行為。"; "Choose the cursor position where you want to list possible candidates." = "請選擇用以觸發選字的游標相對位置。"; "Choose the macOS-level basic keyboard layout." = "請選擇 macOS 基礎鍵盤佈局。"; "Choose the phonetic layout for Mandarin parser." = "請指定普通話/國音分析器所使用的注音排列。"; @@ -123,6 +124,8 @@ "Dachen 26 (libChewing)" = "酷音大千二十六鍵排列"; "Debug Mode" = "偵錯模式"; "Dictionary" = "辭典"; +"Directly commit lowercased letters" = "直接遞交小寫字母"; +"Directly commit uppercased letters" = "直接遞交大寫字母"; "Emulating select-candidate-per-character mode" = "模擬 90 年代前期注音逐字選字輸入風格"; "Enable CNS11643 Support (2022-07-20)" = "啟用 CNS11643 全字庫支援 (2022-07-20)"; "Enable Space key for calling candidate window" = "敲空格鍵以呼出候選字窗"; @@ -143,8 +146,8 @@ "IBM" = "IBM 排列"; "in front of the phrase (like macOS built-in Zhuyin IME)" = "將游標置於詞語前方 // macOS 內建注音風格"; "Japanese" = "和語"; -"Keyboard" = "鍵盤"; "Keyboard Shortcuts:" = "鍵盤快速鍵:"; +"Keyboard" = "鍵盤"; "Misc Settings:" = "雜項:"; "MiTAC" = "神通排列"; "Non-QWERTY alphanumeral keyboard layouts are for Hanyu Pinyin parser only." = "QWERTY 以外的英數佈局是為了漢語拼音排列使用者而準備的。"; @@ -155,6 +158,7 @@ "Secondary Pinyin with Numeral Intonation" = "國音二式+數字標調"; "Seigyou" = "精業排列"; "Selection Keys:" = "選字鍵:"; +"Shift+Letter:" = "Shift+字母鍵:"; "Show Hanyu-Pinyin in the inline composition buffer & tooltip" = "拼音並擊(組字區與工具提示內顯示漢語拼音)"; "Show page buttons in candidate window" = "在選字窗內顯示翻頁按鈕"; "Simplified Chinese" = "簡體中文"; @@ -163,6 +167,7 @@ "Starlight" = "星光排列"; "Stop farting (when typed phonetic combination is invalid, etc.)" = "廉恥模式 // 取消勾選的話,敲錯字時會有異音"; "Traditional Chinese" = "繁體中文"; +"Type them into inline composition buffer" = "直接鍵入內文組字區"; "Typing Style:" = "輸入風格:"; "UI Language:" = "介面語言:"; "Universal Pinyin with Numeral Intonation" = "通用拼音+數字標調"; diff --git a/Source/UI/PrefUI/suiPrefPaneExperience.swift b/Source/UI/PrefUI/suiPrefPaneExperience.swift index ab8958c6..a95c7b57 100644 --- a/Source/UI/PrefUI/suiPrefPaneExperience.swift +++ b/Source/UI/PrefUI/suiPrefPaneExperience.swift @@ -37,6 +37,8 @@ struct suiPrefPaneExperience: View { forKey: UserDef.kKeepReadingUponCompositionError.rawValue) @State private var selTogglingAlphanumericalModeWithLShift = UserDefaults.standard.bool( forKey: UserDef.kTogglingAlphanumericalModeWithLShift.rawValue) + @State private var selUpperCaseLetterKeyBehavior = UserDefaults.standard.integer( + forKey: UserDef.kUpperCaseLetterKeyBehavior.rawValue) private let contentWidth: Double = { switch mgrPrefs.appleLanguages[0] { case "ja": @@ -123,6 +125,19 @@ struct suiPrefPaneExperience: View { Text(LocalizedStringKey("Choose the behavior of (Shift+)Space key with candidates.")) .preferenceDescription() } + Preferences.Section(label: { Text(LocalizedStringKey("Shift+Letter:")) }) { + Picker("", selection: $selUpperCaseLetterKeyBehavior) { + Text(LocalizedStringKey("Type them into inline composition buffer")).tag(0) + Text(LocalizedStringKey("Directly commit lowercased letters")).tag(1) + Text(LocalizedStringKey("Directly commit uppercased letters")).tag(2) + }.onChange(of: selUpperCaseLetterKeyBehavior) { value in + mgrPrefs.upperCaseLetterKeyBehavior = value + } + .labelsHidden() + .pickerStyle(RadioGroupPickerStyle()) + Text(LocalizedStringKey("Choose the behavior of Shift+Letter key with letter inputs.")) + .preferenceDescription() + } Preferences.Section(label: { Text(LocalizedStringKey("Misc Settings:")) }) { Toggle( LocalizedStringKey("Enable Space key for calling candidate window"), diff --git a/Source/WindowNIBs/Base.lproj/frmPrefWindow.xib b/Source/WindowNIBs/Base.lproj/frmPrefWindow.xib index 378e7c0f..4ba628a9 100644 --- a/Source/WindowNIBs/Base.lproj/frmPrefWindow.xib +++ b/Source/WindowNIBs/Base.lproj/frmPrefWindow.xib @@ -28,7 +28,7 @@ - + @@ -36,11 +36,11 @@ - + - + @@ -48,7 +48,7 @@ - + @@ -59,7 +59,7 @@ - + @@ -67,7 +67,7 @@ - + @@ -86,7 +86,7 @@ - + @@ -94,7 +94,7 @@ - + @@ -102,7 +102,7 @@ - + @@ -129,7 +129,7 @@ - + @@ -151,7 +151,10 @@ - + - + - - - + @@ -264,74 +256,97 @@ + + - - + + - - - + - - + - - + + + - - + + - - + + + + - - - + + + - + - - - + + + - - - + + + + - - + - + - + - + @@ -339,7 +354,7 @@ - + @@ -358,7 +373,7 @@ - + @@ -366,7 +381,7 @@ - + @@ -374,7 +389,7 @@ - + @@ -399,7 +414,7 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -435,16 +544,8 @@ - - - - - - - - - + @@ -468,100 +569,53 @@ - - - - - - - - - - - - - - - + - - - + + + + - + + @@ -574,16 +628,19 @@ + + + - + - + @@ -595,7 +652,7 @@ - + @@ -613,7 +670,7 @@ - + @@ -708,7 +765,7 @@