diff --git a/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift b/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift index e0168ccd..c1e6501a 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift @@ -172,7 +172,7 @@ extension Megrez { } } -// MARK: - Internal Methods +// MARK: - Internal Methods (Maybe Public) extension Megrez.Compositor { // MARK: Internal methods for maintaining the grid. @@ -242,45 +242,51 @@ extension Megrez.Compositor { return true } - func getJointKey(range: Range) -> String { - // 下面這句不能用 contains,不然會要求至少 macOS 13 Ventura。 - guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return "" } - return keys[range].joined(separator: separator) - } - func getJointKeyArray(range: Range) -> [String] { // 下面這句不能用 contains,不然會要求至少 macOS 13 Ventura。 guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] } return keys[range].map { String($0) } } - func hasNode(at location: Int, length: Int, key: String) -> Bool { + func getNode(at location: Int, length: Int, keyArray: [String]) -> Node? { let location = max(min(location, spans.count), 0) // 防呆 - guard let node = spans[location].nodeOf(length: length) else { return false } - return key == node.key + guard let node = spans[location].nodeOf(length: length) else { return nil } + return keyArray == node.keyArray ? node : nil } /// 根據當前狀況更新整個組字器的節點文脈。 - /// - Returns: 新增了多少節點。 - @discardableResult mutating func update() -> Int { + /// - Returns: 新增了多少節點。如果返回「0」則表示可能發生了錯誤。 + @discardableResult public mutating func update(updateExisting: Bool = false) -> Int { let maxSpanLength = Megrez.Compositor.maxSpanLength let range = max(0, cursor - maxSpanLength).. Bool { + @discardableResult public func append(node: Node) -> Bool { guard (1...maxSpanLength).contains(node.spanLength) else { return false } @@ -36,7 +36,7 @@ extension Megrez.Compositor { /// 丟掉任何不小於給定幅位長度的節點。 /// - Parameter length: 給定的幅位長度。 /// - Returns: 該操作是否成功執行。 - @discardableResult public mutating func dropNodesOfOrBeyond(length: Int) -> Bool { + @discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool { guard (1...maxSpanLength).contains(length) else { return false } @@ -66,7 +66,7 @@ extension Megrez.Compositor { /// 找出所有與該位置重疊的節點。其返回值為一個節錨陣列(包含節點、以及其起始位置)。 /// - Parameter location: 游標位置。 /// - Returns: 一個包含所有與該位置重疊的節點的陣列。 - func fetchOverlappingNodes(at location: Int) -> [NodeAnchor] { + internal func fetchOverlappingNodes(at location: Int) -> [NodeAnchor] { var results = [NodeAnchor]() guard !spans.isEmpty, location < spans.count else { return results } diff --git a/Packages/vChewing_Megrez/Sources/Megrez/6_Node.swift b/Packages/vChewing_Megrez/Sources/Megrez/6_Node.swift index b046e9f4..78fa31ea 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/6_Node.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/6_Node.swift @@ -39,7 +39,7 @@ extension Megrez.Compositor { public private(set) var spanLength: Int public private(set) var unigrams: [Megrez.Unigram] public private(set) var currentUnigramIndex: Int = 0 { - didSet { currentUnigramIndex = min(max(0, currentUnigramIndex), unigrams.count - 1) } + didSet { currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0) } } public var currentPair: Megrez.Compositor.KeyValuePaired { .init(key: key, value: value) } @@ -53,6 +53,18 @@ extension Megrez.Compositor { hasher.combine(overrideType) } + /// 置換掉該節點內的單元圖陣列資料。 + /// 如果此時影響到了 currentUnigramIndex 所指的內容的話,則將其重設為 0。 + /// - Parameter source: 新的單元圖陣列資料,必須不能為空(否則必定崩潰)。 + public func resetUnigrams(using source: [Megrez.Unigram]) { + let oldCurrentValue = unigrams[currentUnigramIndex].value + unigrams = source + // if unigrams.isEmpty { unigrams.append(.init(value: key, score: -114.514)) } // 保險,請按需啟用。 + currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0) + let newCurrentValue = unigrams[currentUnigramIndex].value + if oldCurrentValue != newCurrentValue { currentUnigramIndex = 0 } + } + public private(set) var overrideType: Node.OverrideType public static func == (lhs: Node, rhs: Node) -> Bool { diff --git a/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift b/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift index 87ba036b..d89de536 100644 --- a/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift +++ b/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift @@ -36,6 +36,13 @@ class SimpleLM: LangModelProtocol { func hasUnigramsFor(key: String) -> Bool { mutDatabase.keys.contains(key) } + + func trim(key: String, value: String) { + guard var arr = mutDatabase[key] else { return } + arr = arr.compactMap { $0.value == value ? nil : $0 } + guard !arr.isEmpty else { return } + mutDatabase[key] = arr + } } class MockLM: LangModelProtocol { diff --git a/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift b/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift index 484a62cc..8ec5c7a1 100644 --- a/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift +++ b/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift @@ -11,7 +11,7 @@ import XCTest final class MegrezTests: XCTestCase { func testSpan() throws { let langModel = SimpleLM(input: strSampleData) - var span = Megrez.Compositor.Span() + let span = Megrez.Compositor.Span() let n1 = Megrez.Compositor.Node(keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(key: "gao1")) let n3 = Megrez.Compositor.Node( keyArray: ["gao1ke1ji4"], spanLength: 3, unigrams: langModel.unigramsFor(key: "gao1ke1ji4") @@ -518,4 +518,21 @@ final class MegrezTests: XCTestCase { result = compositor.walk().0 XCTAssertEqual(result.values, ["高熱", "🔥", "危險"]) } + + func testCompositor_updateUnigramData() throws { + let theLM = SimpleLM(input: strSampleData) + var compositor = Megrez.Compositor(with: theLM) + compositor.separator = "" + compositor.insertKey("nian2") + compositor.insertKey("zhong1") + compositor.insertKey("jiang3") + compositor.insertKey("jin1") + let oldResult = compositor.walk().0.values.joined() + print(oldResult) + theLM.trim(key: "nian2zhong1", value: "年中") + compositor.update(updateExisting: true) + let newResult = compositor.walk().0.values.joined() + print(newResult) + XCTAssertEqual([oldResult, newResult], ["年中獎金", "年終獎金"]) + } }