From f8abcd59e45b782c6608c8cf2b41ee25be2e737e Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 6 Mar 2023 21:47:57 +0800 Subject: [PATCH] Megrez // Fix an issue that update() can ruin a span unit object. --- .../Sources/Megrez/1_Compositor.swift | 2 +- .../Sources/Megrez/4_SpanUnit.swift | 19 +++++++++++++++---- .../Tests/MegrezTests/LMDataForTests.swift | 5 ++++- .../Tests/MegrezTests/MegrezTests.swift | 8 ++++++++ 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift b/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift index 863c2c62..7c3c7a21 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift @@ -291,7 +291,7 @@ extension Megrez.Compositor { // 自動銷毀無效的節點。 if unigrams.isEmpty { if theNode.keyArray.count == 1 { continue } - spans[position].nodes.removeAll { $0 == theNode } + spans[position].nullify(node: theNode) } else { theNode.syncingUnigrams(from: unigrams) } diff --git a/Packages/vChewing_Megrez/Sources/Megrez/4_SpanUnit.swift b/Packages/vChewing_Megrez/Sources/Megrez/4_SpanUnit.swift index c74d271f..27c3c81c 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/4_SpanUnit.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/4_SpanUnit.swift @@ -24,10 +24,7 @@ extension Megrez.Compositor { /// 清除該幅位單元的全部的節點,且重設最長節點長度為 0,然後再在節點陣列內預留空位。 public func clear() { - nodes.removeAll() - for _ in 0 ..< maxSpanLength { - nodes.append(nil) - } + nodes = .init(repeating: nil, count: maxSpanLength) maxLength = 0 } @@ -43,6 +40,18 @@ extension Megrez.Compositor { return true } + /// 丟掉任何與給定節點完全雷同的節點。 + /// - Remark: Swift 不像 C# 那樣有容量鎖定型陣列, + /// 對某個位置的內容的刪除行為都可能會導致其它內容錯位、繼發其它不可知故障。 + /// 於是就提供了這個專門的工具函式。 + /// - Parameter node: 要參照的節點。 + public func nullify(node givenNode: Node) { + nodes.enumerated().forEach { index, theNode in + guard theNode == givenNode else { return } + nodes[index] = nil + } + } + /// 丟掉任何不小於給定幅位長度的節點。 /// - Parameter length: 給定的幅位長度。 /// - Returns: 該操作是否成功執行。 @@ -51,12 +60,14 @@ extension Megrez.Compositor { return false } for i in length ... maxSpanLength { + guard (0 ..< nodes.count).contains(i - 1) else { continue } // 防呆 nodes[i - 1] = nil } maxLength = 0 guard length > 1 else { return false } let maxR = length - 2 for i in 0 ... maxR { + guard (0 ..< nodes.count).contains(maxR - i) else { continue } // 防呆 if nodes[maxR - i] == nil { continue } maxLength = maxR - i + 1 break diff --git a/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift b/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift index 6f9445a9..61a1386a 100644 --- a/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift +++ b/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift @@ -40,7 +40,10 @@ class SimpleLM: LangModelProtocol { func trim(key: String, value: String) { guard var arr = mutDatabase[key] else { return } arr = arr.compactMap { $0.value == value ? nil : $0 } - guard !arr.isEmpty else { return } + guard !arr.isEmpty else { + mutDatabase[key] = nil + return + } mutDatabase[key] = arr } } diff --git a/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift b/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift index 27daf0c3..44e4c6b2 100644 --- a/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift +++ b/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift @@ -537,5 +537,13 @@ final class MegrezTests: XCTestCase { let newResult = compositor.walk().0.values.joined() print(newResult) XCTAssertEqual([oldResult, newResult], ["年中獎金", "年終獎金"]) + compositor.cursor = 4 + compositor.dropKey(direction: .rear) + compositor.dropKey(direction: .rear) + theLM.trim(key: "nian2zhong1", value: "年終") + compositor.update(updateExisting: true) + let newResult2 = compositor.walk().0.values + print(newResult2) + XCTAssertEqual(newResult2, ["年", "中"]) } }