Repo // Conforming compatibility requirements by Megrez 2.0.0 update.
This commit is contained in:
parent
741eee40c0
commit
7ba2983d1e
|
@ -38,13 +38,10 @@ public class KeyHandler {
|
||||||
/// 檢測是否內容為空(注拼槽與組字器都是空的)
|
/// 檢測是否內容為空(注拼槽與組字器都是空的)
|
||||||
var isTypingContentEmpty: Bool { composer.isEmpty && compositor.isEmpty }
|
var isTypingContentEmpty: Bool { composer.isEmpty && compositor.isEmpty }
|
||||||
|
|
||||||
/// 規定最大動態爬軌範圍。組字器內超出該範圍的節錨都會被自動標記為「已經手動選字過」,減少爬軌運算負擔。
|
|
||||||
let kMaxComposingBufferNeedsToWalkSize = Int(max(12, ceil(Double(mgrPrefs.composingBufferSize) / 2)))
|
|
||||||
var composer: Tekkon.Composer = .init() // 注拼槽
|
var composer: Tekkon.Composer = .init() // 注拼槽
|
||||||
var compositor: Megrez.Compositor // 組字器
|
var compositor: Megrez.Compositor // 組字器
|
||||||
var currentLM: vChewing.LMInstantiator = .init() // 當前主語言模組
|
var currentLM: vChewing.LMInstantiator = .init() // 當前主語言模組
|
||||||
var currentUOM: vChewing.LMUserOverride = .init() // 當前半衰記憶模組
|
var currentUOM: vChewing.LMUserOverride = .init() // 當前半衰記憶模組
|
||||||
var walkedAnchors: [Megrez.NodeAnchor] { compositor.walkedAnchors } // 用以記錄爬過的節錨的陣列
|
|
||||||
/// 委任物件 (ctlInputMethod),以便呼叫其中的函式。
|
/// 委任物件 (ctlInputMethod),以便呼叫其中的函式。
|
||||||
var delegate: KeyHandlerDelegate?
|
var delegate: KeyHandlerDelegate?
|
||||||
|
|
||||||
|
@ -72,7 +69,7 @@ public class KeyHandler {
|
||||||
/// 初期化。
|
/// 初期化。
|
||||||
public init() {
|
public init() {
|
||||||
/// 組字器初期化。因為是首次初期化變數,所以這裡不能用 ensureCompositor() 代勞。
|
/// 組字器初期化。因為是首次初期化變數,所以這裡不能用 ensureCompositor() 代勞。
|
||||||
compositor = Megrez.Compositor(lm: currentLM, separator: "-")
|
compositor = Megrez.Compositor(with: currentLM, separator: "-")
|
||||||
/// 注拼槽初期化。
|
/// 注拼槽初期化。
|
||||||
ensureParser()
|
ensureParser()
|
||||||
/// 讀取最近的簡繁體模式、且將該屬性內容塞到 inputMode 當中。
|
/// 讀取最近的簡繁體模式、且將該屬性內容塞到 inputMode 當中。
|
||||||
|
@ -91,7 +88,8 @@ public class KeyHandler {
|
||||||
///
|
///
|
||||||
/// 威注音對游標前置與游標後置模式採取的候選字節點陣列抓取方法是分離的,且不使用 Node Crossing。
|
/// 威注音對游標前置與游標後置模式採取的候選字節點陣列抓取方法是分離的,且不使用 Node Crossing。
|
||||||
var actualCandidateCursor: Int {
|
var actualCandidateCursor: Int {
|
||||||
mgrPrefs.useRearCursorMode ? min(compositor.cursor, compositor.length - 1) : max(compositor.cursor, 1)
|
compositor.cursor
|
||||||
|
- ((compositor.cursor == compositor.width || !mgrPrefs.useRearCursorMode) && compositor.cursor > 0 ? 1 : 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 利用給定的讀音鏈來試圖爬取最接近的組字結果(最大相似度估算)。
|
/// 利用給定的讀音鏈來試圖爬取最接近的組字結果(最大相似度估算)。
|
||||||
|
@ -116,23 +114,6 @@ public class KeyHandler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 在爬取組字結果之前,先將即將從組字區溢出的內容遞交出去。
|
|
||||||
///
|
|
||||||
/// 在理想狀況之下,組字區多長都無所謂。但是,Viterbi 演算法使用 O(N^2),
|
|
||||||
/// 會使得運算壓力隨著節錨數量的增加而增大。於是,有必要限定組字區的長度。
|
|
||||||
/// 超過該長度的內容會在爬軌之前先遞交出去,使其不再記入最大相似度估算的
|
|
||||||
/// 估算對象範圍。用比較形象且生動卻有點噁心的解釋的話,蒼蠅一邊吃一邊屙。
|
|
||||||
var commitOverflownCompositionAndWalk: String {
|
|
||||||
var textToCommit = ""
|
|
||||||
if compositor.width > mgrPrefs.composingBufferSize, !walkedAnchors.isEmpty {
|
|
||||||
let anchor: Megrez.NodeAnchor = walkedAnchors[0]
|
|
||||||
textToCommit = anchor.node.currentPair.value
|
|
||||||
compositor.removeHeadReadings(count: anchor.spanLength)
|
|
||||||
}
|
|
||||||
walk()
|
|
||||||
return textToCommit
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 用以組建聯想詞陣列的函式。
|
/// 用以組建聯想詞陣列的函式。
|
||||||
/// - Parameter key: 給定的聯想詞的開頭字。
|
/// - Parameter key: 給定的聯想詞的開頭字。
|
||||||
/// - Returns: 抓取到的聯想詞陣列。
|
/// - Returns: 抓取到的聯想詞陣列。
|
||||||
|
@ -151,106 +132,86 @@ public class KeyHandler {
|
||||||
/// - value: 給定之候選字字串。
|
/// - value: 給定之候選字字串。
|
||||||
/// - respectCursorPushing: 若該選項為 true,則會在選字之後始終將游標推送至選字後的節錨的前方。
|
/// - respectCursorPushing: 若該選項為 true,則會在選字之後始終將游標推送至選字後的節錨的前方。
|
||||||
func fixNode(candidate: (String, String), respectCursorPushing: Bool = true) {
|
func fixNode(candidate: (String, String), respectCursorPushing: Bool = true) {
|
||||||
let theCandidate: Megrez.KeyValuePaired = .init(key: candidate.0, value: candidate.1)
|
let actualCursor = actualCandidateCursor
|
||||||
let adjustedCursor = max(0, min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositor.length))
|
let theCandidate: Megrez.Compositor.Candidate = .init(key: candidate.0, value: candidate.1)
|
||||||
// 開始讓半衰模組觀察目前的狀況。
|
if !compositor.overrideCandidate(theCandidate, at: actualCursor) { return }
|
||||||
let selectedNode: Megrez.NodeAnchor = compositor.fixNodeWithCandidate(theCandidate, at: adjustedCursor)
|
|
||||||
// 不要針對逐字選字模式啟用臨時半衰記憶模型。
|
|
||||||
if !mgrPrefs.useSCPCTypingMode {
|
|
||||||
var addToUserOverrideModel = true
|
|
||||||
// 所有讀音數與字符數不匹配的情況均不得塞入半衰記憶模組。
|
|
||||||
if selectedNode.spanLength != theCandidate.value.count {
|
|
||||||
IME.prtDebugIntel("UOM: SpanningLength != value.count, dismissing.")
|
|
||||||
addToUserOverrideModel = false
|
|
||||||
}
|
|
||||||
if addToUserOverrideModel {
|
|
||||||
// 威注音的 SymbolLM 的 Score 是 -12,符合該條件的內容不得塞入半衰記憶模組。
|
|
||||||
if selectedNode.node.scoreForPaired(candidate: theCandidate) <= -12 {
|
|
||||||
IME.prtDebugIntel("UOM: Score <= -12, dismissing.")
|
|
||||||
addToUserOverrideModel = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if addToUserOverrideModel, mgrPrefs.fetchSuggestionsFromUserOverrideModel {
|
|
||||||
IME.prtDebugIntel("UOM: Start Observation.")
|
|
||||||
// 這個過程可能會因為使用者半衰記憶模組內部資料錯亂、而導致輸入法在選字時崩潰。
|
|
||||||
// 於是在這裡引入災後狀況察覺專用變數,且先開啟該開關。順利執行完觀察後會關閉。
|
|
||||||
// 一旦輸入法崩潰,會在重啟時發現這個開關是開著的,屆時 AppDelegate 會做出應對。
|
|
||||||
mgrPrefs.failureFlagForUOMObservation = true
|
|
||||||
// 令半衰記憶模組觀測給定的三元圖。
|
|
||||||
// 這個過程會讓半衰引擎根據當前上下文生成三元圖索引鍵。
|
|
||||||
currentUOM.observe(
|
|
||||||
walkedAnchors: walkedAnchors, cursorIndex: adjustedCursor, candidate: theCandidate.value,
|
|
||||||
timestamp: NSDate().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() }
|
|
||||||
)
|
|
||||||
// 如果沒有出現崩框的話,那就將這個開關復位。
|
|
||||||
mgrPrefs.failureFlagForUOMObservation = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 開始爬軌。
|
// 開始爬軌。
|
||||||
walk()
|
walk()
|
||||||
|
|
||||||
|
// 在可行的情況下更新使用者半衰記憶模組。
|
||||||
|
var accumulatedCursor = 0
|
||||||
|
var currentNode: Megrez.Compositor.Node?
|
||||||
|
for node in compositor.walkedNodes {
|
||||||
|
accumulatedCursor += node.spanLength
|
||||||
|
if accumulatedCursor > actualCursor {
|
||||||
|
currentNode = node
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
guard let currentNode = currentNode else { return }
|
||||||
|
|
||||||
|
if currentNode.currentUnigram.score > -12 {
|
||||||
|
IME.prtDebugIntel("UOM: Start Observation.")
|
||||||
|
// 這個過程可能會因為使用者半衰記憶模組內部資料錯亂、而導致輸入法在選字時崩潰。
|
||||||
|
// 於是在這裡引入災後狀況察覺專用變數,且先開啟該開關。順利執行完觀察後會關閉。
|
||||||
|
// 一旦輸入法崩潰,會在重啟時發現這個開關是開著的,屆時 AppDelegate 會做出應對。
|
||||||
|
mgrPrefs.failureFlagForUOMObservation = true
|
||||||
|
// 令半衰記憶模組觀測給定的三元圖。
|
||||||
|
// 這個過程會讓半衰引擎根據當前上下文生成三元圖索引鍵。
|
||||||
|
currentUOM.observe(
|
||||||
|
walkedNodes: compositor.walkedNodes, cursorIndex: actualCursor, candidate: theCandidate.value,
|
||||||
|
timestamp: NSDate().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() }
|
||||||
|
)
|
||||||
|
// 如果沒有出現崩框的話,那就將這個開關復位。
|
||||||
|
mgrPrefs.failureFlagForUOMObservation = false
|
||||||
|
}
|
||||||
|
|
||||||
/// 若偏好設定內啟用了相關選項,則會在選字之後始終將游標推送至選字後的節錨的前方。
|
/// 若偏好設定內啟用了相關選項,則會在選字之後始終將游標推送至選字後的節錨的前方。
|
||||||
if mgrPrefs.moveCursorAfterSelectingCandidate, respectCursorPushing {
|
if mgrPrefs.moveCursorAfterSelectingCandidate, respectCursorPushing {
|
||||||
|
// compositor.cursor = accumulatedCursor
|
||||||
compositor.jumpCursorBySpan(to: .front)
|
compositor.jumpCursorBySpan(to: .front)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 組字器內超出最大動態爬軌範圍的節錨都會被自動標記為「已經手動選字過」,減少爬軌運算負擔。
|
|
||||||
func markNodesFixedIfNecessary() {
|
|
||||||
let width = compositor.width
|
|
||||||
if width <= kMaxComposingBufferNeedsToWalkSize {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
var index = 0
|
|
||||||
for anchor in walkedAnchors {
|
|
||||||
if index >= width - kMaxComposingBufferNeedsToWalkSize { break }
|
|
||||||
if anchor.node.score < Megrez.Node.kSelectedCandidateScore {
|
|
||||||
compositor.fixNodeWithCandidate(anchor.node.currentPair, at: index + anchor.spanLength)
|
|
||||||
}
|
|
||||||
index += anchor.spanLength
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 獲取候選字詞(包含讀音)陣列資料內容。
|
/// 獲取候選字詞(包含讀音)陣列資料內容。
|
||||||
func getCandidatesArray(fixOrder: Bool = true) -> [(String, String)] {
|
func getCandidatesArray(fixOrder: Bool = true) -> [(String, String)] {
|
||||||
var arrAnchors: [Megrez.NodeAnchor] = rawAnchorsOfNodes
|
/// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。
|
||||||
var arrCandidates: [Megrez.KeyValuePaired] = .init()
|
/// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的。
|
||||||
|
var arrCandidates: [Megrez.Compositor.Candidate] = {
|
||||||
|
switch mgrPrefs.useRearCursorMode {
|
||||||
|
case false:
|
||||||
|
return compositor.fetchCandidates(at: actualCandidateCursor, filter: .endAt)
|
||||||
|
case true:
|
||||||
|
return compositor.fetchCandidates(at: actualCandidateCursor, filter: .beginAt)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
/// 原理:nodes 這個回饋結果包含一堆子陣列,分別對應不同詞長的候選字。
|
/// 原理:nodes 這個回饋結果包含一堆子陣列,分別對應不同詞長的候選字。
|
||||||
/// 這裡先對陣列排序、讓最長候選字的子陣列的優先權最高。
|
/// 這裡先對陣列排序、讓最長候選字的子陣列的優先權最高。
|
||||||
/// 這個過程不會傷到子陣列內部的排序。
|
/// 這個過程不會傷到子陣列內部的排序。
|
||||||
if arrAnchors.isEmpty { return .init() }
|
if arrCandidates.isEmpty { return .init() }
|
||||||
|
|
||||||
// 讓更長的節錨排序靠前。
|
|
||||||
arrAnchors = arrAnchors.stableSort { $0.spanLength > $1.spanLength }
|
|
||||||
|
|
||||||
// 將節錨內的候選字詞資料拓印到輸出陣列內。
|
|
||||||
for currentCandidate in arrAnchors.map(\.node.candidates).joined() {
|
|
||||||
// 選字窗的內容的康熙轉換 / JIS 轉換不能放在這裡處理,會影響選字有效性。
|
|
||||||
// 選字的原理是拿著具體的候選字詞的字串去當前的節錨下找出對應的候選字詞(X元圖)。
|
|
||||||
// 一旦在這裡轉換了,節錨內的某些元圖就無法被選中。
|
|
||||||
arrCandidates.append(currentCandidate)
|
|
||||||
}
|
|
||||||
// 決定是否根據半衰記憶模組的建議來調整候選字詞的順序。
|
// 決定是否根據半衰記憶模組的建議來調整候選字詞的順序。
|
||||||
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel || mgrPrefs.useSCPCTypingMode || fixOrder {
|
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel || mgrPrefs.useSCPCTypingMode || fixOrder {
|
||||||
return arrCandidates.map { ($0.key, $0.value) }
|
return arrCandidates.map { ($0.key, $0.value) }
|
||||||
}
|
}
|
||||||
|
|
||||||
let arrSuggestedUnigrams: [Megrez.Unigram] = fetchSuggestedCandidates().stableSort { $0.score > $1.score }
|
let arrSuggestedUnigrams: [(String, Megrez.Unigram)] = fetchSuggestedCandidates()
|
||||||
let arrSuggestedCandidates: [Megrez.KeyValuePaired] = arrSuggestedUnigrams.map(\.keyValue)
|
let arrSuggestedCandidates: [Megrez.Compositor.Candidate] = arrSuggestedUnigrams.map {
|
||||||
|
Megrez.Compositor.Candidate(key: $0.0, value: $0.1.value)
|
||||||
|
}
|
||||||
arrCandidates = arrSuggestedCandidates.filter { arrCandidates.contains($0) } + arrCandidates
|
arrCandidates = arrSuggestedCandidates.filter { arrCandidates.contains($0) } + arrCandidates
|
||||||
arrCandidates = arrCandidates.deduplicate
|
arrCandidates = arrCandidates.deduplicate
|
||||||
arrCandidates = arrCandidates.stableSort { $0.key.split(separator: "-").count > $1.key.split(separator: "-").count }
|
arrCandidates = arrCandidates.stableSort { $0.key.split(separator: "-").count > $1.key.split(separator: "-").count }
|
||||||
return arrCandidates.map { ($0.key, $0.value) }
|
return arrCandidates.map { ($0.key, $0.value) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 向半衰引擎詢問可能的選字建議。拿到的結果會是一個單元圖陣列。
|
/// 向半衰引擎詢問可能的選字建議。拿到的結果會是一個單元圖陣列,會自動按權重排序。
|
||||||
func fetchSuggestedCandidates() -> [Megrez.Unigram] {
|
func fetchSuggestedCandidates() -> [(String, Megrez.Unigram)] {
|
||||||
currentUOM.suggest(
|
currentUOM.suggest(
|
||||||
walkedAnchors: walkedAnchors, cursorIndex: compositor.cursor,
|
walkedNodes: compositor.walkedNodes, cursorIndex: compositor.cursor,
|
||||||
timestamp: NSDate().timeIntervalSince1970
|
timestamp: NSDate().timeIntervalSince1970
|
||||||
)
|
).stableSort { $0.1.score > $1.1.score }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 向半衰引擎詢問可能的選字建議、且套用給組字器內的當前游標位置。
|
/// 向半衰引擎詢問可能的選字建議、且套用給組字器內的當前游標位置。
|
||||||
|
@ -260,31 +221,19 @@ public class KeyHandler {
|
||||||
/// 如果這個開關沒打開的話,直接放棄執行這個函式。
|
/// 如果這個開關沒打開的話,直接放棄執行這個函式。
|
||||||
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return }
|
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return }
|
||||||
/// 先就當前上下文讓半衰引擎重新生成三元圖索引鍵。
|
/// 先就當前上下文讓半衰引擎重新生成三元圖索引鍵。
|
||||||
let overrideValue = fetchSuggestedCandidates().first?.keyValue.value ?? ""
|
let overrideValue = fetchSuggestedCandidates().first?.1.value ?? ""
|
||||||
|
|
||||||
/// 再拿著索引鍵去問半衰模組有沒有選字建議。有的話就遵循之、讓天權星引擎對指定節錨下的節點複寫權重。
|
/// 再拿著索引鍵去問半衰模組有沒有選字建議。有的話就遵循之、讓天權星引擎對指定節錨下的節點複寫權重。
|
||||||
if !overrideValue.isEmpty {
|
if !overrideValue.isEmpty {
|
||||||
IME.prtDebugIntel(
|
IME.prtDebugIntel(
|
||||||
"UOM: Suggestion retrieved, overriding the node score of the selected candidate.")
|
"UOM: Suggestion retrieved, overriding the node score of the selected candidate.")
|
||||||
compositor.overrideNodeScoreForSelectedCandidate(
|
// TODO: 這裡回頭改成用詞音配對來覆寫的形式。
|
||||||
location: min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositor.length),
|
compositor.overrideCandidateLiteral(overrideValue, at: actualCandidateCursor, overrideType: .withTopUnigramScore)
|
||||||
value: overrideValue,
|
|
||||||
overridingScore: findHighestScore(nodeAnchors: rawAnchorsOfNodes, epsilon: kEpsilon)
|
|
||||||
)
|
|
||||||
} else {
|
} else {
|
||||||
IME.prtDebugIntel("UOM: Blank suggestion retrieved, dismissing.")
|
IME.prtDebugIntel("UOM: Blank suggestion retrieved, dismissing.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 就給定的節錨陣列,根據半衰模組的衰減指數,來找出最高權重數值。
|
|
||||||
/// - Parameters:
|
|
||||||
/// - nodes: 給定的節錨陣列。
|
|
||||||
/// - epsilon: 半衰模組的衰減指數。
|
|
||||||
/// - Returns: 尋獲的最高權重數值。
|
|
||||||
func findHighestScore(nodeAnchors: [Megrez.NodeAnchor], epsilon: Double) -> Double {
|
|
||||||
nodeAnchors.map(\.node.highestUnigramScore).max() ?? 0 + epsilon
|
|
||||||
}
|
|
||||||
|
|
||||||
// MARK: - Extracted methods and functions (Tekkon).
|
// MARK: - Extracted methods and functions (Tekkon).
|
||||||
|
|
||||||
/// 獲取與當前注音排列或拼音輸入種類有關的標點索引鍵,以英數下畫線「_」結尾。
|
/// 獲取與當前注音排列或拼音輸入種類有關的標點索引鍵,以英數下畫線「_」結尾。
|
||||||
|
@ -335,15 +284,6 @@ public class KeyHandler {
|
||||||
|
|
||||||
// MARK: - Extracted methods and functions (Megrez).
|
// MARK: - Extracted methods and functions (Megrez).
|
||||||
|
|
||||||
/// 獲取原始節錨資料陣列。
|
|
||||||
var rawAnchorsOfNodes: [Megrez.NodeAnchor] {
|
|
||||||
/// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。
|
|
||||||
/// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的。
|
|
||||||
mgrPrefs.useRearCursorMode
|
|
||||||
? compositor.nodesBeginningAt(location: actualCandidateCursor)
|
|
||||||
: compositor.nodesEndingAt(location: actualCandidateCursor)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 將輸入法偏好設定同步至語言模組內。
|
/// 將輸入法偏好設定同步至語言模組內。
|
||||||
func syncBaseLMPrefs() {
|
func syncBaseLMPrefs() {
|
||||||
currentLM.isPhraseReplacementEnabled = mgrPrefs.phraseReplacementEnabled
|
currentLM.isPhraseReplacementEnabled = mgrPrefs.phraseReplacementEnabled
|
||||||
|
@ -354,7 +294,7 @@ public class KeyHandler {
|
||||||
/// 令組字器重新初期化,使其與被重新指派過的主語言模組對接。
|
/// 令組字器重新初期化,使其與被重新指派過的主語言模組對接。
|
||||||
func ensureCompositor() {
|
func ensureCompositor() {
|
||||||
// 每個漢字讀音都由一個西文半形減號分隔開。
|
// 每個漢字讀音都由一個西文半形減號分隔開。
|
||||||
compositor = Megrez.Compositor(lm: currentLM, separator: "-")
|
compositor = Megrez.Compositor(with: currentLM, separator: "-")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 生成標點符號索引鍵。
|
/// 生成標點符號索引鍵。
|
||||||
|
|
|
@ -81,23 +81,19 @@ extension KeyHandler {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 將該讀音插入至組字器內的軌格當中。
|
// 將該讀音插入至組字器內的軌格當中。
|
||||||
compositor.insertReading(readingKey)
|
compositor.insertKey(readingKey)
|
||||||
|
|
||||||
// 讓組字器反爬軌格。
|
// 讓組字器反爬軌格。
|
||||||
let textToCommit = commitOverflownCompositionAndWalk
|
walk()
|
||||||
|
|
||||||
// 看看半衰記憶模組是否會對目前的狀態給出自動選字建議。
|
// 看看半衰記憶模組是否會對目前的狀態給出自動選字建議。
|
||||||
fetchAndApplySuggestionsFromUserOverrideModel()
|
fetchAndApplySuggestionsFromUserOverrideModel()
|
||||||
|
|
||||||
// 將組字器內超出最大動態爬軌範圍的節錨都標記為「已經手動選字過」,減少之後的爬軌運算負擔。
|
|
||||||
markNodesFixedIfNecessary()
|
|
||||||
|
|
||||||
// 之後就是更新組字區了。先清空注拼槽的內容。
|
// 之後就是更新組字區了。先清空注拼槽的內容。
|
||||||
composer.clear()
|
composer.clear()
|
||||||
|
|
||||||
// 再以回呼組字狀態的方式來執行 updateClientComposingBuffer()。
|
// 再以回呼組字狀態的方式來執行 updateClientComposingBuffer()。
|
||||||
let inputting = buildInputtingState
|
let inputting = buildInputtingState
|
||||||
inputting.textToCommit = textToCommit
|
|
||||||
stateCallback(inputting)
|
stateCallback(inputting)
|
||||||
|
|
||||||
/// 逐字選字模式的處理。
|
/// 逐字選字模式的處理。
|
||||||
|
@ -106,9 +102,9 @@ extension KeyHandler {
|
||||||
state: inputting,
|
state: inputting,
|
||||||
isTypingVertical: input.isTypingVertical
|
isTypingVertical: input.isTypingVertical
|
||||||
)
|
)
|
||||||
if choosingCandidates.candidates.count == 1 {
|
if choosingCandidates.candidates.count == 1, let firstCandidate = choosingCandidates.candidates.first {
|
||||||
let reading: String = choosingCandidates.candidates.first?.0 ?? ""
|
let reading: String = firstCandidate.0
|
||||||
let text: String = choosingCandidates.candidates.first?.1 ?? ""
|
let text: String = firstCandidate.1
|
||||||
stateCallback(InputState.Committing(textToCommit: text))
|
stateCallback(InputState.Committing(textToCommit: text))
|
||||||
|
|
||||||
if !mgrPrefs.associatedPhrasesEnabled {
|
if !mgrPrefs.associatedPhrasesEnabled {
|
||||||
|
|
|
@ -163,10 +163,9 @@ extension KeyHandler {
|
||||||
stateCallback(InputState.Committing(textToCommit: " "))
|
stateCallback(InputState.Committing(textToCommit: " "))
|
||||||
stateCallback(InputState.Empty())
|
stateCallback(InputState.Empty())
|
||||||
} else if currentLM.hasUnigramsFor(key: " ") {
|
} else if currentLM.hasUnigramsFor(key: " ") {
|
||||||
compositor.insertReading(" ")
|
compositor.insertKey(" ")
|
||||||
let textToCommit = commitOverflownCompositionAndWalk
|
walk()
|
||||||
let inputting = buildInputtingState
|
let inputting = buildInputtingState
|
||||||
inputting.textToCommit = textToCommit
|
|
||||||
stateCallback(inputting)
|
stateCallback(inputting)
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
|
@ -283,10 +282,9 @@ extension KeyHandler {
|
||||||
if input.isOptionHold {
|
if input.isOptionHold {
|
||||||
if currentLM.hasUnigramsFor(key: "_punctuation_list") {
|
if currentLM.hasUnigramsFor(key: "_punctuation_list") {
|
||||||
if composer.isEmpty {
|
if composer.isEmpty {
|
||||||
compositor.insertReading("_punctuation_list")
|
compositor.insertKey("_punctuation_list")
|
||||||
let textToCommit: String! = commitOverflownCompositionAndWalk
|
walk()
|
||||||
let inputting = buildInputtingState
|
let inputting = buildInputtingState
|
||||||
inputting.textToCommit = textToCommit
|
|
||||||
stateCallback(inputting)
|
stateCallback(inputting)
|
||||||
stateCallback(buildCandidate(state: inputting, isTypingVertical: input.isTypingVertical))
|
stateCallback(buildCandidate(state: inputting, isTypingVertical: input.isTypingVertical))
|
||||||
} else { // 不要在注音沒敲完整的情況下叫出統合符號選單。
|
} else { // 不要在注音沒敲完整的情況下叫出統合符號選單。
|
||||||
|
|
|
@ -22,21 +22,20 @@ extension KeyHandler {
|
||||||
/// 「更新內文組字區 (Update the composing buffer)」是指要求客體軟體將組字緩衝區的內容
|
/// 「更新內文組字區 (Update the composing buffer)」是指要求客體軟體將組字緩衝區的內容
|
||||||
/// 換成由此處重新生成的組字字串(NSAttributeString,否則會不顯示)。
|
/// 換成由此處重新生成的組字字串(NSAttributeString,否則會不顯示)。
|
||||||
var tooltipParameterRef: [String] = ["", ""]
|
var tooltipParameterRef: [String] = ["", ""]
|
||||||
let nodeValuesArray: [String] = walkedAnchors.values
|
let nodeValuesArray: [String] = compositor.walkedNodes.values
|
||||||
var composedStringCursorIndex = 0
|
var composedStringCursorIndex = 0
|
||||||
var readingCursorIndex = 0
|
var readingCursorIndex = 0
|
||||||
/// IMK 協定的內文組字區的游標長度與游標位置無法正確統計 UTF8 高萬字(比如 emoji)的長度,
|
/// IMK 協定的內文組字區的游標長度與游標位置無法正確統計 UTF8 高萬字(比如 emoji)的長度,
|
||||||
/// 所以在這裡必須做糾偏處理。因為在用 Swift,所以可以用「.utf16」取代「NSString.length()」。
|
/// 所以在這裡必須做糾偏處理。因為在用 Swift,所以可以用「.utf16」取代「NSString.length()」。
|
||||||
/// 這樣就可以免除不必要的類型轉換。
|
/// 這樣就可以免除不必要的類型轉換。
|
||||||
for theAnchor in walkedAnchors {
|
for theNode in compositor.walkedNodes {
|
||||||
let theNode = theAnchor.node
|
let strNodeValue = theNode.value
|
||||||
let strNodeValue = theNode.currentPair.value
|
|
||||||
let arrSplit: [String] = Array(strNodeValue).map { String($0) }
|
let arrSplit: [String] = Array(strNodeValue).map { String($0) }
|
||||||
let codepointCount = arrSplit.count
|
let codepointCount = arrSplit.count
|
||||||
/// 藉下述步驟重新將「可見游標位置」對齊至「組字器內的游標所在的讀音位置」。
|
/// 藉下述步驟重新將「可見游標位置」對齊至「組字器內的游標所在的讀音位置」。
|
||||||
/// 每個節錨(NodeAnchor)都有自身的幅位長度(spanningLength),可以用來
|
/// 每個節錨(NodeAnchor)都有自身的幅位長度(spanningLength),可以用來
|
||||||
/// 累加、以此為依據,來校正「可見游標位置」。
|
/// 累加、以此為依據,來校正「可見游標位置」。
|
||||||
let spanningLength: Int = theAnchor.spanLength
|
let spanningLength: Int = theNode.spanLength
|
||||||
if readingCursorIndex + spanningLength <= compositor.cursor {
|
if readingCursorIndex + spanningLength <= compositor.cursor {
|
||||||
composedStringCursorIndex += strNodeValue.utf16.count
|
composedStringCursorIndex += strNodeValue.utf16.count
|
||||||
readingCursorIndex += spanningLength
|
readingCursorIndex += spanningLength
|
||||||
|
@ -60,14 +59,14 @@ extension KeyHandler {
|
||||||
/// 所以需要上下文工具提示來顯示游標的相對位置。
|
/// 所以需要上下文工具提示來顯示游標的相對位置。
|
||||||
/// 這裡先計算一下要用在工具提示當中的顯示參數的內容。
|
/// 這裡先計算一下要用在工具提示當中的顯示參數的內容。
|
||||||
switch compositor.cursor {
|
switch compositor.cursor {
|
||||||
case compositor.readings.count...:
|
case compositor.keys.count...:
|
||||||
// 這裡的 compositor.cursor 數值不可能大於 readings.count,因為會被 Megrez 自動糾正。
|
// 這裡的 compositor.cursor 數值不可能大於 readings.count,因為會被 Megrez 自動糾正。
|
||||||
tooltipParameterRef[0] = compositor.readings[compositor.cursor - 1]
|
tooltipParameterRef[0] = compositor.keys[compositor.cursor - 1]
|
||||||
case 0:
|
case 0:
|
||||||
tooltipParameterRef[1] = compositor.readings[compositor.cursor]
|
tooltipParameterRef[1] = compositor.keys[compositor.cursor]
|
||||||
default:
|
default:
|
||||||
tooltipParameterRef[0] = compositor.readings[compositor.cursor - 1]
|
tooltipParameterRef[0] = compositor.keys[compositor.cursor - 1]
|
||||||
tooltipParameterRef[1] = compositor.readings[compositor.cursor]
|
tooltipParameterRef[1] = compositor.keys[compositor.cursor]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -125,7 +124,7 @@ extension KeyHandler {
|
||||||
cursorIndex: currentState.cursorIndex,
|
cursorIndex: currentState.cursorIndex,
|
||||||
candidates: getCandidatesArray(fixOrder: mgrPrefs.useFixecCandidateOrderOnSelection),
|
candidates: getCandidatesArray(fixOrder: mgrPrefs.useFixecCandidateOrderOnSelection),
|
||||||
isTypingVertical: isTypingVertical,
|
isTypingVertical: isTypingVertical,
|
||||||
nodeValuesArray: walkedAnchors.values
|
nodeValuesArray: compositor.walkedNodes.values
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,7 +214,7 @@ extension KeyHandler {
|
||||||
cursorIndex: state.cursorIndex,
|
cursorIndex: state.cursorIndex,
|
||||||
markerIndex: index,
|
markerIndex: index,
|
||||||
readings: state.readings,
|
readings: state.readings,
|
||||||
nodeValuesArray: walkedAnchors.values
|
nodeValuesArray: compositor.walkedNodes.values
|
||||||
)
|
)
|
||||||
marking.tooltipForInputting = state.tooltipForInputting
|
marking.tooltipForInputting = state.tooltipForInputting
|
||||||
stateCallback(marking.markedRange.isEmpty ? marking.convertedToInputting : marking)
|
stateCallback(marking.markedRange.isEmpty ? marking.convertedToInputting : marking)
|
||||||
|
@ -237,7 +236,7 @@ extension KeyHandler {
|
||||||
cursorIndex: state.cursorIndex,
|
cursorIndex: state.cursorIndex,
|
||||||
markerIndex: index,
|
markerIndex: index,
|
||||||
readings: state.readings,
|
readings: state.readings,
|
||||||
nodeValuesArray: walkedAnchors.values
|
nodeValuesArray: compositor.walkedNodes.values
|
||||||
)
|
)
|
||||||
marking.tooltipForInputting = state.tooltipForInputting
|
marking.tooltipForInputting = state.tooltipForInputting
|
||||||
stateCallback(marking.markedRange.isEmpty ? marking.convertedToInputting : marking)
|
stateCallback(marking.markedRange.isEmpty ? marking.convertedToInputting : marking)
|
||||||
|
@ -280,10 +279,9 @@ extension KeyHandler {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
compositor.insertReading(customPunctuation)
|
compositor.insertKey(customPunctuation)
|
||||||
let textToCommit = commitOverflownCompositionAndWalk
|
walk()
|
||||||
let inputting = buildInputtingState
|
let inputting = buildInputtingState
|
||||||
inputting.textToCommit = textToCommit
|
|
||||||
stateCallback(inputting)
|
stateCallback(inputting)
|
||||||
|
|
||||||
// 從這一行之後開始,就是針對逐字選字模式的單獨處理。
|
// 從這一行之後開始,就是針對逐字選字模式的單獨處理。
|
||||||
|
@ -338,7 +336,7 @@ extension KeyHandler {
|
||||||
) -> Bool {
|
) -> Bool {
|
||||||
guard state is InputState.Inputting else { return false }
|
guard state is InputState.Inputting else { return false }
|
||||||
|
|
||||||
var composingBuffer = compositor.readings.joined(separator: "-")
|
var composingBuffer = compositor.keys.joined(separator: "-")
|
||||||
if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin {
|
if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin {
|
||||||
composingBuffer = Tekkon.restoreToneOneInZhuyinKey(target: composingBuffer) // 恢復陰平標記
|
composingBuffer = Tekkon.restoreToneOneInZhuyinKey(target: composingBuffer) // 恢復陰平標記
|
||||||
composingBuffer = Tekkon.cnvPhonaToHanyuPinyin(target: composingBuffer) // 注音轉拼音
|
composingBuffer = Tekkon.cnvPhonaToHanyuPinyin(target: composingBuffer) // 注音轉拼音
|
||||||
|
@ -368,7 +366,7 @@ extension KeyHandler {
|
||||||
|
|
||||||
var composed = ""
|
var composed = ""
|
||||||
|
|
||||||
for node in walkedAnchors.map(\.node) {
|
for node in compositor.walkedNodes {
|
||||||
var key = node.key
|
var key = node.key
|
||||||
if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin {
|
if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin {
|
||||||
key = Tekkon.restoreToneOneInZhuyinKey(target: key) // 恢復陰平標記
|
key = Tekkon.restoreToneOneInZhuyinKey(target: key) // 恢復陰平標記
|
||||||
|
@ -379,7 +377,7 @@ extension KeyHandler {
|
||||||
key = Tekkon.cnvZhuyinChainToTextbookReading(target: key, newSeparator: " ")
|
key = Tekkon.cnvZhuyinChainToTextbookReading(target: key, newSeparator: " ")
|
||||||
}
|
}
|
||||||
|
|
||||||
let value = node.currentPair.value
|
let value = node.value
|
||||||
// 不要給標點符號等特殊元素加注音
|
// 不要給標點符號等特殊元素加注音
|
||||||
composed += key.contains("_") ? value : "<ruby>\(value)<rp>(</rp><rt>\(key)</rt><rp>)</rp></ruby>"
|
composed += key.contains("_") ? value : "<ruby>\(value)<rp>(</rp><rt>\(key)</rt><rp>)</rp></ruby>"
|
||||||
}
|
}
|
||||||
|
@ -416,7 +414,7 @@ extension KeyHandler {
|
||||||
composer.clear()
|
composer.clear()
|
||||||
} else if composer.isEmpty {
|
} else if composer.isEmpty {
|
||||||
if compositor.cursor > 0 {
|
if compositor.cursor > 0 {
|
||||||
compositor.dropReading(direction: .rear)
|
compositor.dropKey(direction: .rear)
|
||||||
walk()
|
walk()
|
||||||
} else {
|
} else {
|
||||||
IME.prtDebugIntel("9D69908D")
|
IME.prtDebugIntel("9D69908D")
|
||||||
|
@ -468,7 +466,7 @@ extension KeyHandler {
|
||||||
}
|
}
|
||||||
|
|
||||||
if composer.isEmpty {
|
if composer.isEmpty {
|
||||||
compositor.dropReading(direction: .front)
|
compositor.dropKey(direction: .front)
|
||||||
walk()
|
walk()
|
||||||
} else {
|
} else {
|
||||||
composer.clear()
|
composer.clear()
|
||||||
|
@ -640,7 +638,7 @@ extension KeyHandler {
|
||||||
composingBuffer: currentState.composingBuffer,
|
composingBuffer: currentState.composingBuffer,
|
||||||
cursorIndex: currentState.cursorIndex,
|
cursorIndex: currentState.cursorIndex,
|
||||||
markerIndex: nextPosition,
|
markerIndex: nextPosition,
|
||||||
readings: compositor.readings
|
readings: compositor.keys
|
||||||
)
|
)
|
||||||
marking.tooltipForInputting = currentState.tooltip
|
marking.tooltipForInputting = currentState.tooltip
|
||||||
stateCallback(marking)
|
stateCallback(marking)
|
||||||
|
@ -714,7 +712,7 @@ extension KeyHandler {
|
||||||
composingBuffer: currentState.composingBuffer,
|
composingBuffer: currentState.composingBuffer,
|
||||||
cursorIndex: currentState.cursorIndex,
|
cursorIndex: currentState.cursorIndex,
|
||||||
markerIndex: previousPosition,
|
markerIndex: previousPosition,
|
||||||
readings: compositor.readings
|
readings: compositor.keys
|
||||||
)
|
)
|
||||||
marking.tooltipForInputting = currentState.tooltip
|
marking.tooltipForInputting = currentState.tooltip
|
||||||
stateCallback(marking)
|
stateCallback(marking)
|
||||||
|
@ -770,7 +768,7 @@ extension KeyHandler {
|
||||||
stateCallback: @escaping (InputStateProtocol) -> Void,
|
stateCallback: @escaping (InputStateProtocol) -> Void,
|
||||||
errorCallback: @escaping () -> Void
|
errorCallback: @escaping () -> Void
|
||||||
) -> Bool {
|
) -> Bool {
|
||||||
if composer.isEmpty, compositor.isEmpty || walkedAnchors.isEmpty { return false }
|
if composer.isEmpty, compositor.isEmpty || compositor.walkedNodes.isEmpty { return false }
|
||||||
guard state is InputState.Inputting else {
|
guard state is InputState.Inputting else {
|
||||||
guard state is InputState.Empty else {
|
guard state is InputState.Empty else {
|
||||||
IME.prtDebugIntel("6044F081")
|
IME.prtDebugIntel("6044F081")
|
||||||
|
@ -795,24 +793,27 @@ extension KeyHandler {
|
||||||
}
|
}
|
||||||
|
|
||||||
var length = 0
|
var length = 0
|
||||||
var currentAnchor = Megrez.NodeAnchor()
|
var currentNode: Megrez.Compositor.Node?
|
||||||
let cursorIndex = min(
|
let cursorIndex = actualCandidateCursor
|
||||||
actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositor.length
|
for node in compositor.walkedNodes {
|
||||||
)
|
length += node.spanLength
|
||||||
for anchor in walkedAnchors {
|
if length > cursorIndex {
|
||||||
length += anchor.spanLength
|
currentNode = node
|
||||||
if length >= cursorIndex {
|
|
||||||
currentAnchor = anchor
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let currentNode = currentAnchor.node
|
guard let currentNode = currentNode else {
|
||||||
let currentPaired: Megrez.KeyValuePaired = currentNode.currentPair
|
IME.prtDebugIntel("F58DEA95")
|
||||||
|
errorCallback()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
let currentPaired = (currentNode.key, currentNode.value)
|
||||||
|
|
||||||
var currentIndex = 0
|
var currentIndex = 0
|
||||||
if currentNode.score < Megrez.Node.kSelectedCandidateScore {
|
if !currentNode.isOverriden {
|
||||||
/// 只要是沒有被使用者手動選字過的(節錨下的)節點,
|
/// 如果是沒有被使用者手動選字過的(節錨下的)節點,
|
||||||
/// 就從第一個候選字詞開始,這樣使用者在敲字時就會優先匹配
|
/// 就從第一個候選字詞開始,這樣使用者在敲字時就會優先匹配
|
||||||
/// 那些字詞長度不小於 2 的單元圖。換言之,如果使用者敲了兩個
|
/// 那些字詞長度不小於 2 的單元圖。換言之,如果使用者敲了兩個
|
||||||
/// 注音讀音、卻發現這兩個注音讀音各自的單字權重遠高於由這兩個
|
/// 注音讀音、卻發現這兩個注音讀音各自的單字權重遠高於由這兩個
|
||||||
|
@ -821,14 +822,14 @@ extension KeyHandler {
|
||||||
/// (預設情況下是 (Shift+)Tab 來做正 (反) 向切換,但也可以用
|
/// (預設情況下是 (Shift+)Tab 來做正 (反) 向切換,但也可以用
|
||||||
/// Shift(+CMD)+Space 或 Alt+↑/↓ 來切換(縱排輸入時則是 Alt+←/→)、
|
/// Shift(+CMD)+Space 或 Alt+↑/↓ 來切換(縱排輸入時則是 Alt+←/→)、
|
||||||
/// 以應對臉書綁架 Tab 鍵的情況。
|
/// 以應對臉書綁架 Tab 鍵的情況。
|
||||||
if candidates[0].0 == currentPaired.key, candidates[0].1 == currentPaired.value {
|
if candidates[0] == currentPaired {
|
||||||
/// 如果第一個候選字詞是當前節點的候選字詞的值的話,
|
/// 如果第一個候選字詞是當前節點的候選字詞的值的話,
|
||||||
/// 那就切到下一個(或上一個,也就是最後一個)候選字詞。
|
/// 那就切到下一個(或上一個,也就是最後一個)候選字詞。
|
||||||
currentIndex = reverseModifier ? candidates.count - 1 : 1
|
currentIndex = reverseModifier ? candidates.count - 1 : 1
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for candidate in candidates {
|
for candidate in candidates {
|
||||||
if candidate.0 == currentPaired.key, candidate.1 == currentPaired.value {
|
if candidate == currentPaired {
|
||||||
if reverseModifier {
|
if reverseModifier {
|
||||||
if currentIndex == 0 {
|
if currentIndex == 0 {
|
||||||
currentIndex = candidates.count - 1
|
currentIndex = candidates.count - 1
|
||||||
|
|
|
@ -175,14 +175,8 @@ extension vChewing {
|
||||||
/// - Parameter key: 給定的讀音字串。
|
/// - Parameter key: 給定的讀音字串。
|
||||||
/// - Returns: 對應的經過處理的單元圖陣列。
|
/// - Returns: 對應的經過處理的單元圖陣列。
|
||||||
public func unigramsFor(key: String) -> [Megrez.Unigram] {
|
public func unigramsFor(key: String) -> [Megrez.Unigram] {
|
||||||
if key == " " {
|
/// 給空格鍵指定輸出值。
|
||||||
/// 給空格鍵指定輸出值。
|
if key == " " { return [.init(value: " ")] }
|
||||||
let spaceUnigram = Megrez.Unigram(
|
|
||||||
keyValue: Megrez.KeyValuePaired(key: " ", value: " "),
|
|
||||||
score: 0
|
|
||||||
)
|
|
||||||
return [spaceUnigram]
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 準備不同的語言模組容器,開始逐漸往容器陣列內塞入資料。
|
/// 準備不同的語言模組容器,開始逐漸往容器陣列內塞入資料。
|
||||||
var rawAllUnigrams: [Megrez.Unigram] = []
|
var rawAllUnigrams: [Megrez.Unigram] = []
|
||||||
|
@ -209,11 +203,11 @@ extension vChewing {
|
||||||
rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: key))
|
rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: key))
|
||||||
|
|
||||||
// 準備過濾清單。因為我們在 Swift 使用 NSOrderedSet,所以就不需要統計清單了。
|
// 準備過濾清單。因為我們在 Swift 使用 NSOrderedSet,所以就不需要統計清單了。
|
||||||
var filteredPairs: Set<Megrez.KeyValuePaired> = []
|
var filteredPairs: Set<String> = []
|
||||||
|
|
||||||
// 載入要過濾的 KeyValuePair 清單。
|
// 載入要過濾的 KeyValuePair 清單。
|
||||||
for unigram in lmFiltered.unigramsFor(key: key) {
|
for unigram in lmFiltered.unigramsFor(key: key) {
|
||||||
filteredPairs.insert(unigram.keyValue)
|
filteredPairs.insert(unigram.value)
|
||||||
}
|
}
|
||||||
|
|
||||||
return filterAndTransform(
|
return filterAndTransform(
|
||||||
|
@ -243,9 +237,6 @@ extension vChewing {
|
||||||
lmAssociates.hasValuesFor(pair: pair)
|
lmAssociates.hasValuesFor(pair: pair)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 該函式不起作用,僅用來滿足 LangModelProtocol 協定的要求。
|
|
||||||
public func bigramsFor(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() }
|
|
||||||
|
|
||||||
// MARK: - 核心函式(對內)
|
// MARK: - 核心函式(對內)
|
||||||
|
|
||||||
/// 給定單元圖原始結果陣列,經過語彙過濾處理+置換處理+去重複處理之後,給出單元圖結果陣列。
|
/// 給定單元圖原始結果陣列,經過語彙過濾處理+置換處理+去重複處理之後,給出單元圖結果陣列。
|
||||||
|
@ -255,20 +246,20 @@ extension vChewing {
|
||||||
/// - Returns: 經過語彙過濾處理+置換處理+去重複處理的單元圖結果陣列。
|
/// - Returns: 經過語彙過濾處理+置換處理+去重複處理的單元圖結果陣列。
|
||||||
func filterAndTransform(
|
func filterAndTransform(
|
||||||
unigrams: [Megrez.Unigram],
|
unigrams: [Megrez.Unigram],
|
||||||
filter filteredPairs: Set<Megrez.KeyValuePaired>
|
filter filteredPairs: Set<String>
|
||||||
) -> [Megrez.Unigram] {
|
) -> [Megrez.Unigram] {
|
||||||
var results: [Megrez.Unigram] = []
|
var results: [Megrez.Unigram] = []
|
||||||
var insertedPairs: Set<Megrez.KeyValuePaired> = []
|
var insertedPairs: Set<String> = []
|
||||||
for unigram in unigrams {
|
for unigram in unigrams {
|
||||||
var pair: Megrez.KeyValuePaired = unigram.keyValue
|
var theValue: String = unigram.value
|
||||||
if filteredPairs.contains(pair) { continue }
|
if filteredPairs.contains(theValue) { continue }
|
||||||
if isPhraseReplacementEnabled {
|
if isPhraseReplacementEnabled {
|
||||||
let replacement = lmReplacements.valuesFor(key: pair.value)
|
let replacement = lmReplacements.valuesFor(key: theValue)
|
||||||
if !replacement.isEmpty { pair.value = replacement }
|
if !replacement.isEmpty { theValue = replacement }
|
||||||
}
|
}
|
||||||
if insertedPairs.contains(pair) { continue }
|
if insertedPairs.contains(theValue) { continue }
|
||||||
results.append(Megrez.Unigram(keyValue: pair, score: unigram.score))
|
results.append(Megrez.Unigram(value: theValue, score: unigram.score))
|
||||||
insertedPairs.insert(pair)
|
insertedPairs.insert(theValue)
|
||||||
}
|
}
|
||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,9 +31,9 @@ extension vChewing.LMInstantiator {
|
||||||
var date3 = ChineseConverter.convertArabicNumeralsToChinese(target: date2)
|
var date3 = ChineseConverter.convertArabicNumeralsToChinese(target: date2)
|
||||||
date3 = date3.replacingOccurrences(of: "年〇", with: "年")
|
date3 = date3.replacingOccurrences(of: "年〇", with: "年")
|
||||||
date3 = date3.replacingOccurrences(of: "月〇", with: "月")
|
date3 = date3.replacingOccurrences(of: "月〇", with: "月")
|
||||||
results.append(.init(keyValue: .init(key: key, value: date1), score: -94))
|
results.append(.init(value: date1, score: -94))
|
||||||
results.append(.init(keyValue: .init(key: key, value: date2), score: -95))
|
results.append(.init(value: date2, score: -95))
|
||||||
results.append(.init(keyValue: .init(key: key, value: date3), score: -96))
|
results.append(.init(value: date3, score: -96))
|
||||||
if let currentDateShortened = currentDateShortened, delta.year != 0 {
|
if let currentDateShortened = currentDateShortened, delta.year != 0 {
|
||||||
var dateAlt1: String = formatterDate1.string(from: currentDateShortened)
|
var dateAlt1: String = formatterDate1.string(from: currentDateShortened)
|
||||||
dateAlt1.regReplace(pattern: #"^0+"#)
|
dateAlt1.regReplace(pattern: #"^0+"#)
|
||||||
|
@ -42,9 +42,9 @@ extension vChewing.LMInstantiator {
|
||||||
var dateAlt3 = ChineseConverter.convertArabicNumeralsToChinese(target: dateAlt2)
|
var dateAlt3 = ChineseConverter.convertArabicNumeralsToChinese(target: dateAlt2)
|
||||||
dateAlt3 = dateAlt3.replacingOccurrences(of: "年〇", with: "年")
|
dateAlt3 = dateAlt3.replacingOccurrences(of: "年〇", with: "年")
|
||||||
dateAlt3 = dateAlt3.replacingOccurrences(of: "月〇", with: "月")
|
dateAlt3 = dateAlt3.replacingOccurrences(of: "月〇", with: "月")
|
||||||
results.append(.init(keyValue: .init(key: key, value: dateAlt1), score: -97))
|
results.append(.init(value: dateAlt1, score: -97))
|
||||||
results.append(.init(keyValue: .init(key: key, value: dateAlt2), score: -98))
|
results.append(.init(value: dateAlt2, score: -98))
|
||||||
results.append(.init(keyValue: .init(key: key, value: dateAlt3), score: -99))
|
results.append(.init(value: dateAlt3, score: -99))
|
||||||
}
|
}
|
||||||
case "ㄕˊ-ㄐㄧㄢ":
|
case "ㄕˊ-ㄐㄧㄢ":
|
||||||
let formatterTime1 = DateFormatter()
|
let formatterTime1 = DateFormatter()
|
||||||
|
@ -56,9 +56,9 @@ extension vChewing.LMInstantiator {
|
||||||
let time1 = formatterTime1.string(from: currentDate)
|
let time1 = formatterTime1.string(from: currentDate)
|
||||||
let time2 = formatterTime2.string(from: currentDate)
|
let time2 = formatterTime2.string(from: currentDate)
|
||||||
let time3 = formatterTime3.string(from: currentDate)
|
let time3 = formatterTime3.string(from: currentDate)
|
||||||
results.append(.init(keyValue: .init(key: key, value: time1), score: -97))
|
results.append(.init(value: time1, score: -97))
|
||||||
results.append(.init(keyValue: .init(key: key, value: time2), score: -98))
|
results.append(.init(value: time2, score: -98))
|
||||||
results.append(.init(keyValue: .init(key: key, value: time3), score: -99))
|
results.append(.init(value: time3, score: -99))
|
||||||
case "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ":
|
case "ㄒㄧㄥ-ㄑㄧ", "ㄒㄧㄥ-ㄑㄧˊ":
|
||||||
let formatterWeek1 = DateFormatter()
|
let formatterWeek1 = DateFormatter()
|
||||||
let formatterWeek2 = DateFormatter()
|
let formatterWeek2 = DateFormatter()
|
||||||
|
@ -68,8 +68,8 @@ extension vChewing.LMInstantiator {
|
||||||
formatterWeek2.locale = theLocale
|
formatterWeek2.locale = theLocale
|
||||||
let week1 = formatterWeek1.string(from: currentDate)
|
let week1 = formatterWeek1.string(from: currentDate)
|
||||||
let week2 = formatterWeek2.string(from: currentDate)
|
let week2 = formatterWeek2.string(from: currentDate)
|
||||||
results.append(.init(keyValue: .init(key: key, value: week1), score: -98))
|
results.append(.init(value: week1, score: -98))
|
||||||
results.append(.init(keyValue: .init(key: key, value: week2), score: -99))
|
results.append(.init(value: week2, score: -99))
|
||||||
default: return .init()
|
default: return .init()
|
||||||
}
|
}
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -115,18 +115,6 @@ extension vChewing {
|
||||||
IME.prtDebugIntel(strDump)
|
IME.prtDebugIntel(strDump)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成雙元圖陣列。
|
|
||||||
///
|
|
||||||
/// 威注音輸入法尚未引入雙元圖支援,所以該函式並未擴充相關功能,自然不會起作用。
|
|
||||||
/// - parameters:
|
|
||||||
/// - precedingKey: 前述讀音索引鍵。
|
|
||||||
/// - key: 當前讀音索引鍵。
|
|
||||||
public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
|
||||||
// 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。
|
|
||||||
// 其實只要一句「[Megrez.Bigram]()」就夠了。
|
|
||||||
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成單元圖陣列。
|
/// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的字串首尾範圍資料、據此自 strData 取得字串形式的資料、生成單元圖陣列。
|
||||||
/// - parameters:
|
/// - parameters:
|
||||||
/// - key: 讀音索引鍵。
|
/// - key: 讀音索引鍵。
|
||||||
|
@ -136,7 +124,6 @@ extension vChewing {
|
||||||
for netaRange in arrRangeRecords {
|
for netaRange in arrRangeRecords {
|
||||||
let neta = strData[netaRange].split(separator: " ")
|
let neta = strData[netaRange].split(separator: " ")
|
||||||
let theValue: String = shouldReverse ? String(neta[0]) : String(neta[1])
|
let theValue: String = shouldReverse ? String(neta[0]) : String(neta[1])
|
||||||
let kvPair = Megrez.KeyValuePaired(key: key, value: theValue)
|
|
||||||
var theScore = defaultScore
|
var theScore = defaultScore
|
||||||
if neta.count >= 3, !shouldForceDefaultScore, !neta[2].contains("#") {
|
if neta.count >= 3, !shouldForceDefaultScore, !neta[2].contains("#") {
|
||||||
theScore = .init(String(neta[2])) ?? defaultScore
|
theScore = .init(String(neta[2])) ?? defaultScore
|
||||||
|
@ -144,7 +131,7 @@ extension vChewing {
|
||||||
if theScore > 0 {
|
if theScore > 0 {
|
||||||
theScore *= -1 // 應對可能忘記寫負號的情形
|
theScore *= -1 // 應對可能忘記寫負號的情形
|
||||||
}
|
}
|
||||||
grams.append(Megrez.Unigram(keyValue: kvPair, score: theScore))
|
grams.append(Megrez.Unigram(value: theValue, score: theScore))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return grams
|
return grams
|
||||||
|
|
|
@ -108,18 +108,6 @@ extension vChewing {
|
||||||
IME.prtDebugIntel(strDump)
|
IME.prtDebugIntel(strDump)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 【該功能無法使用】根據給定的前述讀音索引鍵與當前讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成雙元圖陣列。
|
|
||||||
///
|
|
||||||
/// 威注音輸入法尚未引入雙元圖支援,所以該函式並未擴充相關功能,自然不會起作用。
|
|
||||||
/// - parameters:
|
|
||||||
/// - precedingKey: 前述讀音索引鍵。
|
|
||||||
/// - key: 當前讀音索引鍵。
|
|
||||||
public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
|
||||||
// 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。
|
|
||||||
// 其實只要一句「[Megrez.Bigram]()」就夠了。
|
|
||||||
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成單元圖陣列。
|
/// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成單元圖陣列。
|
||||||
/// - parameters:
|
/// - parameters:
|
||||||
/// - key: 讀音索引鍵。
|
/// - key: 讀音索引鍵。
|
||||||
|
@ -130,7 +118,6 @@ extension vChewing {
|
||||||
let strNetaSet = String(decoding: netaSet, as: UTF8.self)
|
let strNetaSet = String(decoding: netaSet, as: UTF8.self)
|
||||||
let neta = Array(strNetaSet.split(separator: " ").reversed())
|
let neta = Array(strNetaSet.split(separator: " ").reversed())
|
||||||
let theValue: String = .init(neta[0])
|
let theValue: String = .init(neta[0])
|
||||||
let kvPair = Megrez.KeyValuePaired(key: key, value: theValue)
|
|
||||||
var theScore = defaultScore
|
var theScore = defaultScore
|
||||||
if neta.count >= 2, !shouldForceDefaultScore {
|
if neta.count >= 2, !shouldForceDefaultScore {
|
||||||
theScore = .init(String(neta[1])) ?? defaultScore
|
theScore = .init(String(neta[1])) ?? defaultScore
|
||||||
|
@ -138,7 +125,7 @@ extension vChewing {
|
||||||
if theScore > 0 {
|
if theScore > 0 {
|
||||||
theScore *= -1 // 應對可能忘記寫負號的情形
|
theScore *= -1 // 應對可能忘記寫負號的情形
|
||||||
}
|
}
|
||||||
grams.append(Megrez.Unigram(keyValue: kvPair, score: theScore))
|
grams.append(Megrez.Unigram(value: theValue, score: theScore))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return grams
|
return grams
|
||||||
|
|
|
@ -27,13 +27,13 @@ extension vChewing {
|
||||||
}
|
}
|
||||||
|
|
||||||
public func observe(
|
public func observe(
|
||||||
walkedAnchors: [Megrez.NodeAnchor],
|
walkedNodes: [Megrez.Compositor.Node],
|
||||||
cursorIndex: Int,
|
cursorIndex: Int,
|
||||||
candidate: String,
|
candidate: String,
|
||||||
timestamp: Double,
|
timestamp: Double,
|
||||||
saveCallback: @escaping () -> Void
|
saveCallback: @escaping () -> Void
|
||||||
) {
|
) {
|
||||||
let key = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex)
|
let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex)
|
||||||
guard !key.isEmpty else { return }
|
guard !key.isEmpty else { return }
|
||||||
|
|
||||||
guard mutLRUMap[key] != nil else {
|
guard mutLRUMap[key] != nil else {
|
||||||
|
@ -57,7 +57,7 @@ extension vChewing {
|
||||||
// 降低磁碟寫入次數。唯有失憶的情況下才會更新觀察且記憶。
|
// 降低磁碟寫入次數。唯有失憶的情況下才會更新觀察且記憶。
|
||||||
if var theNeta = mutLRUMap[key] {
|
if var theNeta = mutLRUMap[key] {
|
||||||
_ = suggest(
|
_ = suggest(
|
||||||
walkedAnchors: walkedAnchors, cursorIndex: cursorIndex, timestamp: timestamp,
|
walkedNodes: walkedNodes, cursorIndex: cursorIndex, timestamp: timestamp,
|
||||||
decayCallback: {
|
decayCallback: {
|
||||||
theNeta.observation.update(candidate: candidate, timestamp: timestamp)
|
theNeta.observation.update(candidate: candidate, timestamp: timestamp)
|
||||||
self.mutLRUList.insert(theNeta, at: 0)
|
self.mutLRUList.insert(theNeta, at: 0)
|
||||||
|
@ -70,17 +70,17 @@ extension vChewing {
|
||||||
}
|
}
|
||||||
|
|
||||||
public func suggest(
|
public func suggest(
|
||||||
walkedAnchors: [Megrez.NodeAnchor],
|
walkedNodes: [Megrez.Compositor.Node],
|
||||||
cursorIndex: Int,
|
cursorIndex: Int,
|
||||||
timestamp: Double,
|
timestamp: Double,
|
||||||
decayCallback: @escaping () -> Void = {}
|
decayCallback: @escaping () -> Void = {}
|
||||||
) -> [Megrez.Unigram] {
|
) -> [(String, Megrez.Unigram)] {
|
||||||
let key = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex)
|
let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex)
|
||||||
guard !key.isEmpty else {
|
guard !key.isEmpty else {
|
||||||
IME.prtDebugIntel("UOM: Blank key generated on suggestion, aborting suggestion.")
|
IME.prtDebugIntel("UOM: Blank key generated on suggestion, aborting suggestion.")
|
||||||
return .init()
|
return .init()
|
||||||
}
|
}
|
||||||
let currentReadingKey = convertKeyFrom(walkedAnchors: walkedAnchors, cursorIndex: cursorIndex, readingOnly: true)
|
let currentReadingKey = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex, readingOnly: true)
|
||||||
guard let koPair = mutLRUMap[key] else {
|
guard let koPair = mutLRUMap[key] else {
|
||||||
IME.prtDebugIntel("UOM: mutLRUMap[key] is nil, throwing blank suggestion for key: \(key).")
|
IME.prtDebugIntel("UOM: mutLRUMap[key] is nil, throwing blank suggestion for key: \(key).")
|
||||||
return .init()
|
return .init()
|
||||||
|
@ -88,7 +88,7 @@ extension vChewing {
|
||||||
|
|
||||||
let observation = koPair.observation
|
let observation = koPair.observation
|
||||||
|
|
||||||
var arrResults = [Megrez.Unigram]()
|
var arrResults = [(String, Megrez.Unigram)]()
|
||||||
var currentHighScore = 0.0
|
var currentHighScore = 0.0
|
||||||
for overrideNeta in Array(observation.overrides) {
|
for overrideNeta in Array(observation.overrides) {
|
||||||
let override: Override = overrideNeta.value
|
let override: Override = overrideNeta.value
|
||||||
|
@ -111,10 +111,8 @@ extension vChewing {
|
||||||
)
|
)
|
||||||
if (0...currentHighScore).contains(overrideDetectionScore) { decayCallback() }
|
if (0...currentHighScore).contains(overrideDetectionScore) { decayCallback() }
|
||||||
|
|
||||||
let newUnigram = Megrez.Unigram(
|
let newUnigram = Megrez.Unigram(value: overrideNeta.key, score: overrideScore)
|
||||||
keyValue: .init(key: currentReadingKey, value: overrideNeta.key), score: overrideScore
|
arrResults.insert((currentReadingKey, newUnigram), at: 0)
|
||||||
)
|
|
||||||
arrResults.insert(newUnigram, at: 0)
|
|
||||||
currentHighScore = overrideScore
|
currentHighScore = overrideScore
|
||||||
}
|
}
|
||||||
if arrResults.isEmpty {
|
if arrResults.isEmpty {
|
||||||
|
@ -137,12 +135,12 @@ extension vChewing {
|
||||||
}
|
}
|
||||||
|
|
||||||
func convertKeyFrom(
|
func convertKeyFrom(
|
||||||
walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false
|
walkedNodes: [Megrez.Compositor.Node], cursorIndex: Int, readingOnly: Bool = false
|
||||||
) -> String {
|
) -> String {
|
||||||
let whiteList = "你他妳她祢衪它牠再在"
|
let whiteList = "你他妳她祢衪它牠再在"
|
||||||
var arrNodes: [Megrez.NodeAnchor] = []
|
var arrNodes: [Megrez.Compositor.Node] = []
|
||||||
var intLength = 0
|
var intLength = 0
|
||||||
for theNodeAnchor in walkedAnchors {
|
for theNodeAnchor in walkedNodes {
|
||||||
arrNodes.append(theNodeAnchor)
|
arrNodes.append(theNodeAnchor)
|
||||||
intLength += theNodeAnchor.spanLength
|
intLength += theNodeAnchor.spanLength
|
||||||
if intLength >= cursorIndex {
|
if intLength >= cursorIndex {
|
||||||
|
@ -154,7 +152,7 @@ extension vChewing {
|
||||||
|
|
||||||
arrNodes = Array(arrNodes.reversed())
|
arrNodes = Array(arrNodes.reversed())
|
||||||
|
|
||||||
let kvCurrent = arrNodes[0].node.currentPair
|
let kvCurrent = arrNodes[0].currentPair
|
||||||
guard !kvCurrent.key.contains("_") else {
|
guard !kvCurrent.key.contains("_") else {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
@ -183,7 +181,7 @@ extension vChewing {
|
||||||
!kvPrevious.key.contains("_"),
|
!kvPrevious.key.contains("_"),
|
||||||
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
|
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
|
||||||
{
|
{
|
||||||
kvPrevious = arrNodes[1].node.currentPair
|
kvPrevious = arrNodes[1].currentPair
|
||||||
readingStack = kvPrevious.key + readingStack
|
readingStack = kvPrevious.key + readingStack
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,7 +189,7 @@ extension vChewing {
|
||||||
!kvAnterior.key.contains("_"),
|
!kvAnterior.key.contains("_"),
|
||||||
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
|
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
|
||||||
{
|
{
|
||||||
kvAnterior = arrNodes[2].node.currentPair
|
kvAnterior = arrNodes[2].currentPair
|
||||||
readingStack = kvAnterior.key + readingStack
|
readingStack = kvAnterior.key + readingStack
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -184,7 +184,7 @@ enum mgrLangModel {
|
||||||
(mode == InputMode.imeModeCHT)
|
(mode == InputMode.imeModeCHT)
|
||||||
? gLangModelCHT.unigramsFor(key: unigramKey) : gLangModelCHS.unigramsFor(key: unigramKey)
|
? gLangModelCHT.unigramsFor(key: unigramKey) : gLangModelCHS.unigramsFor(key: unigramKey)
|
||||||
for unigram in unigrams {
|
for unigram in unigrams {
|
||||||
if unigram.keyValue.value == userPhrase {
|
if unigram.value == userPhrase {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue