From 32e462af43f935f97224946f613f0d4cd6528da9 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 24 Oct 2022 12:46:06 +0800 Subject: [PATCH] Megrez // Let insertKey() return false if no new nodes added. --- .../Sources/Megrez/1_Compositor.swift | 19 +++++++++++++++---- .../Sources/Megrez/4_Span.swift | 8 ++++---- .../Tests/MegrezTests/MegrezTests.swift | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift b/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift index 6e1175c8..e0168ccd 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift @@ -73,8 +73,14 @@ extension Megrez { @discardableResult public mutating func insertKey(_ key: String) -> Bool { guard !key.isEmpty, key != separator, langModel.hasUnigramsFor(key: key) else { return false } keys.insert(key, at: cursor) + let gridBackup = spans resizeGrid(at: cursor, do: .expand) - update() + let nodesInserted = update() + // 用來在 langModel.hasUnigramsFor() 結果不準確的時候防呆、恢復被搞壞的 spans。 + if nodesInserted == 0 { + spans = gridBackup + return false + } cursor += 1 // 游標必須得在執行 update() 之後才可以變動。 return true } @@ -219,7 +225,7 @@ extension Megrez.Compositor { /// (XXXXXXX? <-被砍爛的節點 /// ``` /// - Parameter location: 給定的幅位座標。 - func dropWreckedNodes(at location: Int) { + mutating func dropWreckedNodes(at location: Int) { let location = max(min(location, spans.count), 0) // 防呆 guard !spans.isEmpty else { return } let affectedLength = Megrez.Compositor.maxSpanLength - 1 @@ -230,7 +236,7 @@ extension Megrez.Compositor { } } - @discardableResult func insertNode(_ node: Node, at location: Int) -> Bool { + @discardableResult mutating func insertNode(_ node: Node, at location: Int) -> Bool { let location = max(min(location, spans.count - 1), 0) // 防呆 spans[location].append(node: node) return true @@ -254,9 +260,12 @@ extension Megrez.Compositor { return key == node.key } - func update() { + /// 根據當前狀況更新整個組字器的節點文脈。 + /// - Returns: 新增了多少節點。 + @discardableResult mutating func update() -> Int { let maxSpanLength = Megrez.Compositor.maxSpanLength let range = max(0, cursor - maxSpanLength).. Bool { + @discardableResult public mutating func append(node: Node) -> Bool { guard (1...maxSpanLength).contains(node.spanLength) else { return false } @@ -36,7 +36,7 @@ extension Megrez.Compositor { /// 丟掉任何不小於給定幅位長度的節點。 /// - Parameter length: 給定的幅位長度。 /// - Returns: 該操作是否成功執行。 - @discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool { + @discardableResult public mutating func dropNodesOfOrBeyond(length: Int) -> Bool { guard (1...maxSpanLength).contains(length) else { return false } diff --git a/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift b/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift index 4e0a2255..484a62cc 100644 --- a/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift +++ b/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift @@ -11,7 +11,7 @@ import XCTest final class MegrezTests: XCTestCase { func testSpan() throws { let langModel = SimpleLM(input: strSampleData) - let span = Megrez.Compositor.Span() + var span = Megrez.Compositor.Span() let n1 = Megrez.Compositor.Node(keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(key: "gao1")) let n3 = Megrez.Compositor.Node( keyArray: ["gao1ke1ji4"], spanLength: 3, unigrams: langModel.unigramsFor(key: "gao1ke1ji4")