From 44f22973a1c26906f4b2d5e407fb02c1bc608df2 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Thu, 14 Jul 2022 15:08:50 +0800 Subject: [PATCH 1/2] Megrez // v1.2.9 update with bug fixes. --- .../LangModelRelated/LMInstantiator.swift | 4 +-- .../LangModelRelated/SubLMs/lmCoreEX.swift | 2 +- .../LangModelRelated/SubLMs/lmCoreNS.swift | 2 +- .../LanguageParsers/Megrez/1_Compositor.swift | 21 ++++++++---- .../LanguageParsers/Megrez/2_Grid.swift | 34 +++++-------------- .../LanguageParsers/Megrez/3_NodeAnchor.swift | 30 ++++++++-------- .../LanguageParsers/Megrez/4_Node.swift | 12 ++++--- .../Megrez/5_LanguageModel.swift | 4 +-- 8 files changed, 52 insertions(+), 57 deletions(-) diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index d13705f4..69b9c73f 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -185,7 +185,7 @@ extension vChewing { // MARK: - 核心函式(對外) /// 威注音輸入法目前尚未具備對雙元圖的處理能力,故停用該函式。 - // public func bigramsForKeys(preceedingKey: String, key: String) -> [Megrez.Bigram] { } + // public func bigramsFor(preceedingKey: String, key: String) -> [Megrez.Bigram] { } /// 給定讀音字串,讓 LMI 給出對應的經過處理的單元圖陣列。 /// - Parameter key: 給定的讀音字串。 @@ -257,7 +257,7 @@ extension vChewing { } /// 該函式不起作用,僅用來滿足 LangModelProtocol 協定的要求。 - public func bigramsForKeys(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() } + public func bigramsFor(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() } // MARK: - 核心函式(對內) diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift index 07170818..49978808 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift @@ -136,7 +136,7 @@ extension vChewing { /// - parameters: /// - precedingKey: 前述讀音索引鍵 /// - key: 當前讀音索引鍵 - public func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] { + public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] { // 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。 // 其實只要一句「[Megrez.Bigram]()」就夠了。 precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift index c1f93e7f..da250eee 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreNS.swift @@ -130,7 +130,7 @@ extension vChewing { /// - parameters: /// - precedingKey: 前述讀音索引鍵 /// - key: 當前讀音索引鍵 - public func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] { + public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] { // 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。 // 其實只要一句「[Megrez.Bigram]()」就夠了。 precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() diff --git a/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift b/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift index 76a8f6f1..c6ca135c 100644 --- a/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift +++ b/Source/Modules/LanguageParsers/Megrez/1_Compositor.swift @@ -26,7 +26,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. extension Megrez { /// 組字器。 public class Compositor: Grid { - /// 文字輸入方向 + /// 就文字輸入方向而言的方向。 public enum TypingDirection { case front, rear } /// 給被丟掉的節點路徑施加的負權重。 private let kDroppedPathScore: Double = -999 @@ -38,7 +38,14 @@ extension Megrez { private var langModel: LangModelProtocol /// 允許查詢當前游標位置屬於第幾個幅位座標(從 0 開始算)。 private(set) var cursorRegionMap: [Int: Int] = .init() - private(set) var walkedAnchors: [Megrez.NodeAnchor] = [] // 用以記錄爬過的節錨的陣列 + /// 用以記錄爬過的節錨的陣列。 + private(set) var walkedAnchors: [NodeAnchor] = [] + + /// 該函式用以更新爬過的節錨的陣列。 + /// - Parameter nodes: 傳入的節點陣列。 + public func updateWalkedAnchors(with nodes: [Node]) { + walkedAnchors = nodes.map { Megrez.NodeAnchor(node: $0) } + } /// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為空。 public var joinSeparator: String = "-" @@ -47,7 +54,7 @@ extension Megrez { public var length: Int { readings.count } /// 按幅位來前後移動游標。 - /// - Parameter direction: 移動方向 + /// - Parameter direction: 移動方向。 /// - Returns: 該操作是否順利完成。 @discardableResult public func jumpCursorBySpan(to direction: TypingDirection) -> Bool { switch direction { @@ -88,7 +95,7 @@ extension Megrez { /// - separator: 多字讀音鍵當中用以分割漢字讀音的記號,預設為空。 public init(lm: LangModelProtocol, length: Int = 10, separator: String = "-") { langModel = lm - super.init(spanLength: abs(length)) // 防呆 + super.init(spanLengthLimit: abs(length)) // 防呆 joinSeparator = separator } @@ -181,7 +188,7 @@ extension Megrez { var paths = [[NodeAnchor]]() let nodes = nodesEndingAt(location: location).stableSorted { - $0.scoreForSort > $1.scoreForSort + $0.node.score > $1.node.score } guard !nodes.isEmpty else { return .init() } // 防止下文出現範圍外索引的錯誤 @@ -270,7 +277,7 @@ extension Megrez { if hasMatchedNode(location: p, spanLength: q, key: combinedReading) { continue } let unigrams: [Unigram] = langModel.unigramsFor(key: combinedReading) if unigrams.isEmpty { continue } - let n = Node(key: combinedReading, unigrams: unigrams) + let n: Node = .init(key: combinedReading, spanLength: q, unigrams: unigrams) insertNode(node: n, location: p, spanLength: q) } } @@ -282,6 +289,7 @@ extension Megrez { internal func updateCursorJumpingTables(_ anchors: [NodeAnchor]) { var cursorRegionMapDict = [Int: Int]() + cursorRegionMapDict[-1] = 0 // 防呆 var counter = 0 for (i, anchor) in anchors.enumerated() { for _ in 0.. NodeAnchor { let location = abs(location) // 防呆 var node = NodeAnchor() - for theAnchor in nodesOverlappedAt(location: location) { + for theAnchor in nodesCrossingOrEndingAt(location: location) { let candidates = theAnchor.node.candidates // 將該位置的所有節點的候選字詞鎖定狀態全部重設。 theAnchor.node.resetCandidate() @@ -217,7 +199,7 @@ extension Megrez { @discardableResult public func fixNodeWithCandidate(_ pair: KeyValuePaired, at location: Int) -> NodeAnchor { let location = abs(location) // 防呆 var node = NodeAnchor() - for theAnchor in nodesOverlappedAt(location: location) { + for theAnchor in nodesCrossingOrEndingAt(location: location) { let candidates = theAnchor.node.candidates // 將該位置的所有節點的候選字詞鎖定狀態全部重設。 theAnchor.node.resetCandidate() diff --git a/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift b/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift index 40c9c89d..2ce89823 100644 --- a/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift +++ b/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift @@ -30,28 +30,35 @@ extension Megrez { public var isEmpty: Bool { node.key.isEmpty } /// 節點。一個節锚內不一定有節點。 public var node: Node = .init() - /// 節锚所在的位置。 - public var location: Int = 0 /// 指定的幅位長度。 - public var spanLength: Int = 0 + public var spanLength: Int { node.spanLength } + /// 獲取用來比較的權重。 + public var scoreForSort: Double { node.score } /// 累計權重。 public var mass: Double = 0.0 - /// 索引鍵的長度。 - public var keyLength: Int { - isEmpty ? node.key.count : 0 + /// 單元圖陣列。 + public var unigrams: [Unigram] { node.unigrams } + /// 雙元圖陣列。 + public var bigrams: [Bigram] { node.bigrams } + /// 鍵。 + public var key: String { node.key } + + /// 初期化一個節錨。 + public init(node: Node = .init(), mass: Double? = nil) { + self.node = node + self.mass = mass ?? self.node.score } + /// 將該節錨雜湊化。 public func hash(into hasher: inout Hasher) { hasher.combine(node) - hasher.combine(location) - hasher.combine(spanLength) hasher.combine(mass) } /// 將當前節锚列印成一個字串。 public var description: String { var stream = "" - stream += "{@(" + String(location) + "," + String(spanLength) + ")," + stream += "{@(" + String(spanLength) + ")," if node.key.isEmpty { stream += node.description } else { @@ -60,11 +67,6 @@ extension Megrez { stream += "}" return stream } - - /// 獲取用來比較的權重。 - public var scoreForSort: Double { - isEmpty ? node.score : 0 - } } } diff --git a/Source/Modules/LanguageParsers/Megrez/4_Node.swift b/Source/Modules/LanguageParsers/Megrez/4_Node.swift index f5fc0d63..af951a54 100644 --- a/Source/Modules/LanguageParsers/Megrez/4_Node.swift +++ b/Source/Modules/LanguageParsers/Megrez/4_Node.swift @@ -30,7 +30,7 @@ extension Megrez { lhs.key == rhs.key && lhs.score == rhs.score && lhs.unigrams == rhs.unigrams && lhs.bigrams == rhs.bigrams && lhs.candidates == rhs.candidates && lhs.valueUnigramIndexMap == rhs.valueUnigramIndexMap && lhs.precedingBigramMap == rhs.precedingBigramMap && lhs.isCandidateFixed == rhs.isCandidateFixed - && lhs.selectedUnigramIndex == rhs.selectedUnigramIndex + && lhs.selectedUnigramIndex == rhs.selectedUnigramIndex && lhs.spanLength == rhs.spanLength } public func hash(into hasher: inout Hasher) { @@ -38,6 +38,7 @@ extension Megrez { hasher.combine(score) hasher.combine(unigrams) hasher.combine(bigrams) + hasher.combine(spanLength) hasher.combine(candidates) hasher.combine(valueUnigramIndexMap) hasher.combine(precedingBigramMap) @@ -50,9 +51,11 @@ extension Megrez { /// 當前節點的當前被選中的候選字詞「在該節點內的」目前的權重。 private(set) var score: Double = 0 /// 單元圖陣列。 - private var unigrams: [Unigram] + private(set) var unigrams: [Unigram] /// 雙元圖陣列。 - private var bigrams: [Bigram] + private(set) var bigrams: [Bigram] + /// 指定的幅位長度。 + public var spanLength: Int = 0 /// 候選字詞陣列,以鍵值陣列的形式存在。 private(set) var candidates: [KeyValuePaired] = [] /// 專門「用單元圖資料值來調查索引值」的辭典。 @@ -83,10 +86,11 @@ extension Megrez { /// - key: 索引鍵。 /// - unigrams: 單元圖陣列。 /// - bigrams: 雙元圖陣列(非必填)。 - public init(key: String = "", unigrams: [Megrez.Unigram] = [], bigrams: [Megrez.Bigram] = []) { + public init(key: String = "", spanLength: Int = 0, unigrams: [Megrez.Unigram] = [], bigrams: [Megrez.Bigram] = []) { self.key = key self.unigrams = unigrams self.bigrams = bigrams + self.spanLength = spanLength self.unigrams.sort { $0.score > $1.score diff --git a/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift b/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift index 75ee404e..c5dda601 100644 --- a/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift +++ b/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift @@ -28,7 +28,7 @@ public protocol LangModelProtocol { func unigramsFor(key: String) -> [Megrez.Unigram] /// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。 - func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] + func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] /// 給定鍵,確認是否有單元圖記錄在庫。 func hasUnigramsFor(key: String) -> Bool @@ -47,7 +47,7 @@ extension Megrez { } /// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。 - open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] { + open func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] { precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() } From 8af2350eabf890134f4b659430853cbe9acaf44d Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Thu, 14 Jul 2022 15:23:24 +0800 Subject: [PATCH 2/2] KeyHandler // Use spanLength for stableSort(). --- Source/Modules/ControllerModules/KeyHandler_Core.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Modules/ControllerModules/KeyHandler_Core.swift b/Source/Modules/ControllerModules/KeyHandler_Core.swift index 50af2fc9..93217d84 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Core.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Core.swift @@ -229,7 +229,7 @@ class KeyHandler { if arrAnchors.isEmpty { return .init() } // 讓更長的節錨排序靠前。 - arrAnchors = arrAnchors.stableSort { $0.keyLength > $1.keyLength } + arrAnchors = arrAnchors.stableSort { $0.spanLength > $1.spanLength } // 將節錨內的候選字詞資料拓印到輸出陣列內。 for currentCandidate in arrAnchors.map(\.node.candidates).joined() {