From 490a646f8862db1ccdb0c1e11f55b28a2a8aec39 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Thu, 9 Mar 2023 22:28:06 +0800 Subject: [PATCH] Megrez // Compositor refactoration with hard copy support. --- .../Sources/Megrez/1_Compositor.swift | 88 +++++---- .../Sources/Megrez/2_Walker.swift | 33 ++-- .../Sources/Megrez/3_KeyValuePaired.swift | 50 +++-- .../Sources/Megrez/4_SpanUnit.swift | 134 +++++++------- .../Sources/Megrez/5_Vertex.swift | 6 +- .../Sources/Megrez/6_Node.swift | 42 +++-- .../Sources/Megrez/8_Unigram.swift | 4 +- .../Tests/MegrezTests/LMDataForTests.swift | 4 +- .../Tests/MegrezTests/MegrezTests.swift | 173 ++++++++++-------- 9 files changed, 291 insertions(+), 243 deletions(-) diff --git a/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift b/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift index 7c3c7a21..73bc5469 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift @@ -66,6 +66,26 @@ public extension Megrez { self.separator = separator } + /// 以指定組字器生成拷貝。 + /// - Remark: 因為 Node 不是 Struct,所以會在 Compositor 被拷貝的時候無法被真實複製。 + /// 這樣一來,Compositor 複製品當中的 Node 的變化會被反應到原先的 Compositor 身上。 + /// 這在某些情況下會造成意料之外的混亂情況,所以需要引入一個拷貝用的建構子。 + public init(from target: Compositor) { + cursor = target.cursor + marker = target.marker + separator = target.separator + walkedNodes = target.walkedNodes.map(\.copy) + keys = target.keys + spans = target.spans.map(\.hardCopy) + langModel = target.langModel + } + + /// 該組字器的硬拷貝。 + /// - Remark: 因為 Node 不是 Struct,所以會在 Compositor 被拷貝的時候無法被真實複製。 + /// 這樣一來,Compositor 複製品當中的 Node 的變化會被反應到原先的 Compositor 身上。 + /// 這在某些情況下會造成意料之外的混亂情況,所以需要引入一個拷貝用的建構子。 + public var hardCopy: Compositor { .init(from: self) } + /// 重置包括游標在內的各項參數,且清空各種由組字器生成的內部資料。 /// /// 將已經被插入的索引鍵陣列與幅位單元陣列(包括其內的節點)全部清空。 @@ -167,21 +187,19 @@ public extension Megrez { public var dumpDOT: String { // C# StringBuilder 與 Swift NSMutableString 能提供爆發性的效能。 let strOutput: NSMutableString = .init(string: "digraph {\ngraph [ rankdir=LR ];\nBOS;\n") - for (p, span) in spans.enumerated() { - for ni in 0 ... (span.maxLength) { - guard let np = span.nodeOf(length: ni) else { continue } - if p == 0 { - strOutput.append("BOS -> \(np.value);\n") - } + spans.enumerated().forEach { p, span in + (0 ... span.maxLength).forEach { ni in + guard let np = span[ni] else { return } + if p == 0 { strOutput.append("BOS -> \(np.value);\n") } strOutput.append("\(np.value);\n") if (p + ni) < spans.count { let destinationSpan = spans[p + ni] - for q in 0 ... (destinationSpan.maxLength) { - guard let dn = destinationSpan.nodeOf(length: q) else { continue } + (0 ... destinationSpan.maxLength).forEach { q in + guard let dn = destinationSpan[q] else { return } strOutput.append(np.value + " -> " + dn.value + ";\n") } } - guard (p + ni) == spans.count else { continue } + guard (p + ni) == spans.count else { return } strOutput.append(np.value + " -> EOS;\n") } } @@ -198,11 +216,11 @@ extension Megrez.Compositor { /// - Parameters: /// - location: 給定的幅位座標。 /// - action: 指定是擴張還是縮減一個幅位。 - mutating func resizeGrid(at location: Int, do action: ResizeBehavior) { + private mutating func resizeGrid(at location: Int, do action: ResizeBehavior) { let location = max(min(location, spans.count), 0) // 防呆 switch action { case .expand: - spans.insert(SpanUnit(), at: location) + spans.insert(.init(), at: location) if [0, spans.count].contains(location) { return } case .shrink: if spans.count == location { return } @@ -248,60 +266,54 @@ extension Megrez.Compositor { let affectedLength = Megrez.Compositor.maxSpanLength - 1 let begin = max(0, location - affectedLength) guard location >= begin else { return } - for i in begin ..< location { - spans[i].dropNodesOfOrBeyond(length: location - i + 1) + (begin ..< location).forEach { delta in + ((location - delta + 1) ... Self.maxSpanLength).forEach { theLength in + spans[delta][theLength] = nil + } } } /// 自索引鍵陣列獲取指定範圍的資料。 /// - Parameter range: 指定範圍。 /// - Returns: 拿到的資料。 - func getJoinedKeyArray(range: Range) -> [String] { + private func getJoinedKeyArray(range: Range) -> [String] { // 下面這句不能用 contains,不然會要求至少 macOS 13 Ventura。 guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] } return keys[range].map(\.description) } - /// 在指定位置(以指定索引鍵陣列和指定幅位長度)拿取節點。 - /// - Parameters: - /// - location: 指定游標位置。 - /// - length: 指定幅位長度。 - /// - keyArray: 指定索引鍵陣列。 - /// - Returns: 拿取的節點。拿不到的話就會是 nil。 - func getNode(at location: Int, length: Int, keyArray: [String]) -> Node? { - let location = max(min(location, spans.count - 1), 0) // 防呆 - guard let node = spans[location].nodeOf(length: length) else { return nil } - return keyArray == node.keyArray ? node : nil - } - /// 根據當前狀況更新整個組字器的節點文脈。 /// - Parameter updateExisting: 是否根據目前的語言模型的資料狀態來對既有節點更新其內部的單元圖陣列資料。 /// 該特性可以用於「在選字窗內屏蔽了某個詞之後,立刻生效」這樣的軟體功能需求的實現。 /// - Returns: 新增或影響了多少個節點。如果返回「0」則表示可能發生了錯誤。 @discardableResult public mutating func update(updateExisting: Bool = false) -> Int { let maxSpanLength = Megrez.Compositor.maxSpanLength - let range = max(0, cursor - maxSpanLength) ..< min(cursor + maxSpanLength, keys.count) + let rangeOfPositions = max(0, cursor - maxSpanLength) ..< min(cursor + maxSpanLength, keys.count) var nodesChanged = 0 - for position in range { - for theLength in 1 ... min(maxSpanLength, range.upperBound - position) { - let joinedKeyArray = getJoinedKeyArray(range: position ..< (position + theLength)) - if let theNode = getNode(at: position, length: theLength, keyArray: joinedKeyArray) { - if !updateExisting { continue } + rangeOfPositions.forEach { position in + let rangeOfLengths = 1 ... min(maxSpanLength, rangeOfPositions.upperBound - position) + rangeOfLengths.forEach { theLength in + guard position + theLength <= keys.count, position >= 0 else { return } + let joinedKeyArray = keys[position ..< (position + theLength)].map(\.description) + + if let theNode = spans[position][theLength] { + if !updateExisting { return } let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray) // 自動銷毀無效的節點。 if unigrams.isEmpty { - if theNode.keyArray.count == 1 { continue } - spans[position].nullify(node: theNode) + if theNode.keyArray.count == 1 { return } + spans[position][theNode.spanLength] = nil } else { theNode.syncingUnigrams(from: unigrams) } nodesChanged += 1 - continue + return } let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray) - guard !unigrams.isEmpty else { continue } - spans[position].append( - node: .init(keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams) + guard !unigrams.isEmpty else { return } + // 這裡原本用 SpanUnit.addNode 來完成的,但直接當作辭典來互動的話也沒差。 + spans[position][theLength] = .init( + keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams ) nodesChanged += 1 } diff --git a/Packages/vChewing_Megrez/Sources/Megrez/2_Walker.swift b/Packages/vChewing_Megrez/Sources/Megrez/2_Walker.swift index 9967e351..0e795924 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/2_Walker.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/2_Walker.swift @@ -13,37 +13,34 @@ public extension Megrez.Compositor { /// 對於 `G = (V, E)`,該算法的運行次數為 `O(|V|+|E|)`,其中 `G` 是一個有向無環圖。 /// 這意味著,即使軌格很大,也可以用很少的算力就可以爬軌。 /// - Returns: 爬軌結果+該過程是否順利執行。 - @discardableResult mutating func walk() -> (walkedNode: [Node], succeeded: Bool) { - var result = [Node]() + @discardableResult mutating func walk() -> (walkedNodes: [Megrez.Node], succeeded: Bool) { + var result = [Megrez.Node]() defer { walkedNodes = result } guard !spans.isEmpty else { return (result, true) } var vertexSpans = [[Vertex]]() - for _ in spans { + spans.forEach { _ in vertexSpans.append(.init()) } - for (i, span) in spans.enumerated() { - for j in 1 ... max(span.maxLength, 1) { - if let theNode = span.nodeOf(length: j) { - vertexSpans[i].append(.init(node: theNode)) - } + spans.enumerated().forEach { i, span in + (1 ... max(span.maxLength, 1)).forEach { j in + guard let theNode = span[j] else { return } + vertexSpans[i].append(.init(node: theNode)) } } let terminal = Vertex(node: .init(keyArray: ["_TERMINAL_"])) var root = Vertex(node: .init(keyArray: ["_ROOT_"])) - for (i, vertexSpan) in vertexSpans.enumerated() { - for vertex in vertexSpan { + vertexSpans.enumerated().forEach { i, vertexSpan in + vertexSpan.forEach { vertex in let nextVertexPosition = i + vertex.node.spanLength if nextVertexPosition == vertexSpans.count { vertex.edges.append(terminal) - continue - } - for nextVertex in vertexSpans[nextVertexPosition] { - vertex.edges.append(nextVertex) + return } + vertexSpans[nextVertexPosition].forEach { vertex.edges.append($0) } } } @@ -51,15 +48,13 @@ public extension Megrez.Compositor { root.edges.append(contentsOf: vertexSpans[0]) var ordered = topologicalSort(root: &root) - for (j, neta) in ordered.reversed().enumerated() { - for (k, _) in neta.edges.enumerated() { - relax(u: neta, v: &neta.edges[k]) - } + ordered.reversed().enumerated().forEach { j, neta in + neta.edges.indices.forEach { relax(u: neta, v: &neta.edges[$0]) } ordered[j] = neta } var iterated = terminal - var walked = [Node]() + var walked = [Megrez.Node]() var totalLengthOfKeys = 0 while let itPrev = iterated.prev { diff --git a/Packages/vChewing_Megrez/Sources/Megrez/3_KeyValuePaired.swift b/Packages/vChewing_Megrez/Sources/Megrez/3_KeyValuePaired.swift index 609179a6..983cd079 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/3_KeyValuePaired.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/3_KeyValuePaired.swift @@ -5,7 +5,7 @@ import Foundation -public extension Megrez.Compositor { +public extension Megrez { /// 鍵值配對,乃索引鍵陣列與讀音的配對單元。 struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible { /// 索引鍵陣列。一般情況下用來放置讀音等可以用來作為索引的內容。 @@ -18,6 +18,8 @@ public extension Megrez.Compositor { public var isValid: Bool { !keyArray.joined().isEmpty && !value.isEmpty } /// 將當前鍵值列印成一個字串,但如果該鍵值配對為空的話則僅列印「()」。 public var toNGramKey: String { !isValid ? "()" : "(" + joinedKey() + "," + value + ")" } + /// 通用陣列表達形式。 + public var tupletExpression: (keyArray: [String], value: String) { (keyArray, value) } /// 初期化一組鍵值配對。 /// - Parameters: @@ -28,6 +30,13 @@ public extension Megrez.Compositor { self.value = value.isEmpty ? "N/A" : value } + /// 初期化一組鍵值配對。 + /// - Parameter tupletExpression: 傳入的通用陣列表達形式。 + public init(_ tupletExpression: (keyArray: [String], value: String)) { + keyArray = tupletExpression.keyArray.isEmpty ? ["N/A"] : tupletExpression.keyArray + value = tupletExpression.value.isEmpty ? "N/A" : tupletExpression.value + } + /// 初期化一組鍵值配對。 /// - Parameters: /// - key: 索引鍵。一般情況下用來放置讀音等可以用來作為索引的內容。 @@ -72,7 +81,9 @@ public extension Megrez.Compositor { || (lhs.keyArray.count == rhs.keyArray.count && lhs.value >= rhs.value) } } +} +public extension Megrez.Compositor { /// 規定候選字陣列內容的獲取範圍類型: /// - all: 不只包含其它兩類結果,還允許游標穿插候選字。 /// - beginAt: 僅獲取從當前游標位置開始的節點內的候選字。 @@ -84,8 +95,8 @@ public extension Megrez.Compositor { /// 話,那麼這裡會用到 location - 1、以免去在呼叫該函式後再處理的麻煩。 /// - Parameter location: 游標位置。 /// - Returns: 候選字音配對陣列。 - func fetchCandidates(at location: Int, filter: CandidateFetchFilter = .all) -> [KeyValuePaired] { - var result = [KeyValuePaired]() + func fetchCandidates(at location: Int, filter: CandidateFetchFilter = .all) -> [Megrez.KeyValuePaired] { + var result = [Megrez.KeyValuePaired]() guard !keys.isEmpty else { return result } let location = max(min(location, keys.count - 1), 0) // 防呆 let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted { @@ -93,17 +104,16 @@ public extension Megrez.Compositor { $0.spanLength > $1.spanLength } let keyAtCursor = keys[location] - for theNode in anchors.map(\.node) { - if theNode.keyArray.isEmpty { continue } - for gram in theNode.unigrams { + anchors.map(\.node).filter(\.keyArray.isEmpty.negative).forEach { theNode in + theNode.unigrams.forEach { gram in switch filter { case .all: - // 得加上這道篩選,所以會出現很多無效結果。 - if !theNode.keyArray.contains(keyAtCursor) { continue } + // 得加上這道篩選,不然會出現很多無效結果。 + if !theNode.keyArray.contains(keyAtCursor) { return } case .beginAt: - if theNode.keyArray[0] != keyAtCursor { continue } + if theNode.keyArray[0] != keyAtCursor { return } case .endAt: - if theNode.keyArray.reversed()[0] != keyAtCursor { continue } + if theNode.keyArray.reversed()[0] != keyAtCursor { return } } result.append(.init(keyArray: theNode.keyArray, value: gram.value)) } @@ -120,7 +130,7 @@ public extension Megrez.Compositor { /// - overrideType: 指定覆寫行為。 /// - Returns: 該操作是否成功執行。 @discardableResult func overrideCandidate( - _ candidate: KeyValuePaired, at location: Int, overrideType: Node.OverrideType = .withHighScore + _ candidate: Megrez.KeyValuePaired, at location: Int, overrideType: Megrez.Node.OverrideType = .withHighScore ) -> Bool { @@ -137,7 +147,7 @@ public extension Megrez.Compositor { /// - Returns: 該操作是否成功執行。 @discardableResult func overrideCandidateLiteral( _ candidate: String, - at location: Int, overrideType: Node.OverrideType = .withHighScore + at location: Int, overrideType: Megrez.Node.OverrideType = .withHighScore ) -> Bool { overrideCandidateAgainst(keyArray: nil, at: location, value: candidate, type: overrideType) } @@ -151,7 +161,7 @@ public extension Megrez.Compositor { /// - value: 資料值。 /// - type: 指定覆寫行為。 /// - Returns: 該操作是否成功執行。 - internal func overrideCandidateAgainst(keyArray: [String]?, at location: Int, value: String, type: Node.OverrideType) + internal func overrideCandidateAgainst(keyArray: [String]?, at location: Int, value: String, type: Megrez.Node.OverrideType) -> Bool { let location = max(min(location, keys.count), 0) // 防呆 @@ -166,18 +176,18 @@ public extension Megrez.Compositor { guard let overridden = overridden else { return false } // 啥也不覆寫。 - for i in overridden.spanIndex ..< min(spans.count, overridden.spanIndex + overridden.node.spanLength) { + (overridden.spanIndex ..< min(spans.count, overridden.spanIndex + overridden.node.spanLength)).forEach { i in /// 咱們還得弱化所有在相同的幅位座標的節點的複寫權重。舉例說之前爬軌的結果是「A BC」 /// 且 A 與 BC 都是被覆寫的結果,然後使用者現在在與 A 相同的幅位座標位置 /// 選了「DEF」,那麼 BC 的覆寫狀態就有必要重設(但 A 不用重設)。 arrOverlappedNodes = fetchOverlappingNodes(at: i) - for anchor in arrOverlappedNodes { - if anchor.node == overridden.node { continue } + arrOverlappedNodes.forEach { anchor in + if anchor.node == overridden.node { return } if !overridden.node.joinedKey(by: "\t").contains(anchor.node.joinedKey(by: "\t")) || !overridden.node.value.contains(anchor.node.value) { anchor.node.reset() - continue + return } anchor.node.overridingScore /= 4 } @@ -208,3 +218,9 @@ private extension Sequence { .map(\.element) } } + +// MARK: - Bool Extension (Private) + +extension Bool { + var negative: Bool { !self } +} diff --git a/Packages/vChewing_Megrez/Sources/Megrez/4_SpanUnit.swift b/Packages/vChewing_Megrez/Sources/Megrez/4_SpanUnit.swift index aa355a88..7f46810f 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/4_SpanUnit.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/4_SpanUnit.swift @@ -3,90 +3,84 @@ // ==================== // This code is released under the MIT license (SPDX-License-Identifier: MIT) -extension Megrez.Compositor { - /// 幅位單元乃指一組共享起點的節點。 - public class SpanUnit { - /// 節點陣列。每個位置上的節點可能是 nil。 - public var nodes: [Int: Node] = [:] - /// 該幅位單元內的所有節點當中持有最長幅位的節點長度。 - /// 該變數受該幅位的自身操作函式而被動更新。 - public var maxLength: Int { nodes.keys.max() ?? 0 } +public extension Megrez { + /// 幅位乃指一組共享起點的節點。其實是個辭典:[幅位長度: 節點]。 + typealias SpanUnit = [Int: Node] +} - /// (該變數為捷徑,代傳 Megrez.Compositor.maxSpanLength。) - private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength } - /// 該幅位單元內的節點的幅位長度上限。 - private var allowedLengths: ClosedRange { 1 ... maxSpanLength } - - /// 幅位乃指一組共享起點的節點。 - public init() { - clear() - } - - /// 清除該幅位單元的全部的節點,且重設最長節點長度為 0,然後再在節點陣列內預留空位。 - public func clear() { - nodes.removeAll() - } - - /// 往該幅位塞入一個節點。 - /// - Parameter node: 要塞入的節點。 - /// - Returns: 該操作是否成功執行。 - @discardableResult public func append(node: Node) -> Bool { - guard allowedLengths.contains(node.spanLength) else { return false } - nodes[node.spanLength] = node - return true - } - - /// 丟掉任何與給定節點完全雷同的節點。 - /// - Remark: Swift 不像 C# 那樣有容量鎖定型陣列, - /// 對某個位置的內容的刪除行為都可能會導致其它內容錯位、繼發其它不可知故障。 - /// 於是就提供了這個專門的工具函式。 - /// - Parameter node: 要參照的節點。 - public func nullify(node givenNode: Node) { - let spanLength = givenNode.spanLength - nodes[spanLength] = nil - } - - /// 丟掉任何不小於給定幅位長度的節點。 - /// - Parameter length: 給定的幅位長度。 - /// - Returns: 該操作是否成功執行。 - @discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool { - guard allowedLengths.contains(length) else { return false } - let length = min(length, maxSpanLength) - (length ... maxSpanLength).forEach { nodes[$0] = nil } - return true - } - - /// 以給定的幅位長度,在當前幅位單元內找出對應的節點。 - /// - Parameter length: 給定的幅位長度。 - /// - Returns: 查詢結果。 - public func nodeOf(length: Int) -> Node? { - guard allowedLengths.contains(length) else { return nil } - return nodes[length] +public extension Megrez.SpanUnit { + /// 幅位乃指一組共享起點的節點。其實是個辭典:[幅位長度: 節點]。 + /// - Remark: 因為 Node 不是 Struct,所以會在 Compositor 被拷貝的時候無法被真實複製。 + /// 這樣一來,Compositor 複製品當中的 Node 的變化會被反應到原先的 Compositor 身上。 + /// 這在某些情況下會造成意料之外的混亂情況,所以需要引入一個拷貝用的建構子。 + init(SpanUnit target: Megrez.SpanUnit) { + self.init() + target.forEach { theKey, theValue in + self[theKey] = theValue.copy } } - // MARK: Internal implementations. + /// 該幅位的硬拷貝。 + var hardCopy: Megrez.SpanUnit { .init(SpanUnit: self) } + // MARK: - Dynamic Variables + + /// 該幅位單元內的所有節點當中持有最長幅位的節點長度。 + /// 該變數受該幅位的自身操作函式而被動更新。 + var maxLength: Int { keys.max() ?? 0 } + + /// (該變數為捷徑,代傳 Megrez.Compositor.maxSpanLength。) + private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength } + /// 該幅位單元內的節點的幅位長度上限。 + private var allowedLengths: ClosedRange { 1 ... maxSpanLength } + + // MARK: - Functions + + /// 往該幅位塞入一個節點。 + /// - Remark: 這個函式用來防呆。一般情況下用不到。 + /// - Parameter node: 要塞入的節點。 + /// - Returns: 該操作是否成功執行。 + @discardableResult mutating func addNode(node: Megrez.Node) -> Bool { + guard allowedLengths.contains(node.spanLength) else { return false } + self[node.spanLength] = node + return true + } + + /// 丟掉任何不小於給定幅位長度的節點。 + /// - Remark: 這個函式用來防呆。一般情況下用不到。 + /// - Parameter length: 給定的幅位長度。 + /// - Returns: 該操作是否成功執行。 + @discardableResult mutating func dropNodesOfOrBeyond(length: Int) -> Bool { + guard allowedLengths.contains(length) else { return false } + let length = Swift.min(length, maxSpanLength) + (length ... maxSpanLength).forEach { self[$0] = nil } + return true + } +} + +// MARK: - Related Compositor Implementations. + +extension Megrez.Compositor { /// 找出所有與該位置重疊的節點。其返回值為一個節錨陣列(包含節點、以及其起始位置)。 /// - Parameter location: 游標位置。 /// - Returns: 一個包含所有與該位置重疊的節點的陣列。 - internal func fetchOverlappingNodes(at location: Int) -> [NodeAnchor] { + func fetchOverlappingNodes(at givenLocation: Int) -> [NodeAnchor] { var results = [NodeAnchor]() - guard !spans.isEmpty, location < spans.count else { return results } + guard !spans.isEmpty, givenLocation < spans.count else { return results } // 先獲取該位置的所有單字節點。 - for theLocation in 1 ... spans[location].maxLength { - guard let node = spans[location].nodeOf(length: theLocation) else { continue } - results.append(.init(node: node, spanIndex: location)) + (1 ... max(spans[givenLocation].maxLength, 1)).forEach { theSpanLength in + guard let node = spans[givenLocation][theSpanLength] else { return } + results.append(.init(node: node, spanIndex: givenLocation)) } // 再獲取以當前位置結尾或開頭的節點。 - let begin: Int = location - min(location, Megrez.Compositor.maxSpanLength - 1) - for theLocation in begin ..< location { - let (A, B): (Int, Int) = (location - theLocation + 1, spans[theLocation].maxLength) - guard A <= B else { continue } - for theLength in A ... B { - guard let node = spans[theLocation].nodeOf(length: theLength) else { continue } + let begin: Int = givenLocation - min(givenLocation, Megrez.Compositor.maxSpanLength - 1) + (begin ..< givenLocation).forEach { theLocation in + let (A, B): (Int, Int) = (givenLocation - theLocation + 1, spans[theLocation].maxLength) + guard A <= B else { return } + (A ... B).forEach { theLength in + guard let node = spans[theLocation][theLength] else { return } results.append(.init(node: node, spanIndex: theLocation)) } } diff --git a/Packages/vChewing_Megrez/Sources/Megrez/5_Vertex.swift b/Packages/vChewing_Megrez/Sources/Megrez/5_Vertex.swift index 6072d337..a101d65b 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/5_Vertex.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/5_Vertex.swift @@ -20,13 +20,13 @@ extension Megrez.Compositor { /// 在進行進行位相幾何排序時會用到的狀態標記。 public var topologicallySorted = false /// 字詞節點。 - public var node: Node + public var node: Megrez.Node /// 初期化一個「有向無環圖的」的頂點單位。 /// /// 這是一個可變的數據結構,用於有向無環圖的構建和單源最短路徑的計算。 /// - Parameter node: 字詞節點。 - public init(node: Node) { + public init(node: Megrez.Node) { self.node = node } @@ -65,7 +65,7 @@ extension Megrez.Compositor { /// 這樣我們就不會受到當前線程的堆棧大小的限制。以下是等價的原始算法。 /// ``` /// func topologicalSort(vertex: Vertex) { - /// for vertexNode in vertex.edges { + /// vertex.edges.forEach {vertexNode in /// if !vertexNode.topologicallySorted { /// dfs(vertexNode, result) /// vertexNode.topologicallySorted = true diff --git a/Packages/vChewing_Megrez/Sources/Megrez/6_Node.swift b/Packages/vChewing_Megrez/Sources/Megrez/6_Node.swift index 47f5a4cd..a502adc0 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/6_Node.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/6_Node.swift @@ -5,7 +5,7 @@ import Foundation -public extension Megrez.Compositor { +public extension Megrez { /// 字詞節點。 /// /// 一個節點由這些內容組成:幅位長度、索引鍵、以及一組單元圖。幅位長度就是指這個 @@ -38,8 +38,6 @@ public extension Megrez.Compositor { /// 數(比如野獸常數),以讓「c」更容易單獨被選中。 public var overridingScore: Double = 114_514 - // public var key: String { keyArray.joined(separator: Megrez.Compositor.theSeparator) } - /// 索引鍵陣列。 public private(set) var keyArray: [String] /// 幅位長度。 @@ -54,21 +52,22 @@ public extension Megrez.Compositor { } /// 該節點當前狀態所展示的鍵值配對。 - public var currentPair: Megrez.Compositor.KeyValuePaired { .init(keyArray: keyArray, value: value) } + public var currentPair: Megrez.KeyValuePaired { .init(keyArray: keyArray, value: value) } /// 做為預設雜湊函式。 /// - Parameter hasher: 目前物件的雜湊碼。 public func hash(into hasher: inout Hasher) { + hasher.combine(overridingScore) hasher.combine(keyArray) hasher.combine(spanLength) hasher.combine(unigrams) - hasher.combine(currentUnigramIndex) - hasher.combine(spanLength) hasher.combine(currentOverrideType) + hasher.combine(currentUnigramIndex) } public static func == (lhs: Node, rhs: Node) -> Bool { - lhs.keyArray == rhs.keyArray && lhs.spanLength == rhs.spanLength + lhs.overridingScore == rhs.overridingScore && lhs.spanLength == rhs.spanLength + && lhs.keyArray == rhs.keyArray && lhs.currentUnigramIndex == rhs.currentUnigramIndex && lhs.unigrams == rhs.unigrams && lhs.currentOverrideType == rhs.currentOverrideType } @@ -90,6 +89,25 @@ public extension Megrez.Compositor { currentOverrideType = .withNoOverrides } + /// 以指定字詞節點生成拷貝。 + /// - Remark: 因為 Node 不是 Struct,所以會在 Compositor 被拷貝的時候無法被真實複製。 + /// 這樣一來,Compositor 複製品當中的 Node 的變化會被反應到原先的 Compositor 身上。 + /// 這在某些情況下會造成意料之外的混亂情況,所以需要引入一個拷貝用的建構子。 + public init(node: Node) { + overridingScore = node.overridingScore + keyArray = node.keyArray + spanLength = node.spanLength + unigrams = node.unigrams + currentOverrideType = node.currentOverrideType + currentUnigramIndex = node.currentUnigramIndex + } + + /// 生成自身的拷貝。 + /// - Remark: 因為 Node 不是 Struct,所以會在 Compositor 被拷貝的時候無法被真實複製。 + /// 這樣一來,Compositor 複製品當中的 Node 的變化會被反應到原先的 Compositor 身上。 + /// 這在某些情況下會造成意料之外的混亂情況,所以需要引入一個拷貝用的建構子。 + public var copy: Node { .init(node: self) } + /// 檢查當前節點是否「讀音字長與候選字字長不一致」。 public var isReadingMismatched: Bool { keyArray.count != value.count } /// 該節點是否處於被覆寫的狀態。 @@ -162,7 +180,7 @@ public extension Megrez.Compositor { /// 節錨。在 Gramambular 2 當中又被稱為「NodeInSpan」。 struct NodeAnchor: Hashable { /// 節點。 - let node: Megrez.Compositor.Node + let node: Megrez.Node /// 幅位座標。 let spanIndex: Int /// 幅位長度。 @@ -185,7 +203,7 @@ public extension Megrez.Compositor { // MARK: - Array Extensions. -public extension Array where Element == Megrez.Compositor.Node { +public extension Array where Element == Megrez.Node { /// 從一個節點陣列當中取出目前的選字字串陣列。 var values: [String] { map(\.value) } @@ -204,7 +222,7 @@ public extension Array where Element == Megrez.Compositor.Node { var resultA = [Int: Int]() var resultB: [Int: Int] = [-1: 0] // 防呆 var cursorCounter = 0 - for (nodeCounter, neta) in enumerated() { + enumerated().forEach { nodeCounter, neta in resultA[nodeCounter] = cursorCounter neta.keyArray.forEach { _ in resultB[cursorCounter] = nodeCounter @@ -243,7 +261,7 @@ public extension Array where Element == Megrez.Compositor.Node { /// - cursor: 給定游標位置。 /// - outCursorPastNode: 找出的節點的前端位置。 /// - Returns: 查找結果。 - func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? { + func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Node? { guard !isEmpty else { return nil } let cursor = Swift.max(0, Swift.min(cursor, totalKeyCount - 1)) // 防呆 let range = contextRange(ofGivenCursor: cursor) @@ -255,7 +273,7 @@ public extension Array where Element == Megrez.Compositor.Node { /// 在陣列內以給定游標位置找出對應的節點。 /// - Parameter cursor: 給定游標位置。 /// - Returns: 查找結果。 - func findNode(at cursor: Int) -> Megrez.Compositor.Node? { + func findNode(at cursor: Int) -> Megrez.Node? { var useless = 0 return findNode(at: cursor, target: &useless) } diff --git a/Packages/vChewing_Megrez/Sources/Megrez/8_Unigram.swift b/Packages/vChewing_Megrez/Sources/Megrez/8_Unigram.swift index 5f056be7..cfccde05 100644 --- a/Packages/vChewing_Megrez/Sources/Megrez/8_Unigram.swift +++ b/Packages/vChewing_Megrez/Sources/Megrez/8_Unigram.swift @@ -48,8 +48,8 @@ public extension Array where Element == Megrez.Unigram { mutating func consolidate(filter theFilter: Set = .init()) { var inserted: [String: Double] = [:] var insertedArray: [Megrez.Unigram] = [] - for neta in filter({ !theFilter.contains($0.value) }) { - if inserted.keys.contains(neta.value) { continue } + filter { !theFilter.contains($0.value) }.forEach { neta in + if inserted.keys.contains(neta.value) { return } inserted[neta.value] = neta.score insertedArray.append(neta) } diff --git a/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift b/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift index 61a1386a..70a9e61b 100644 --- a/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift +++ b/Packages/vChewing_Megrez/Tests/MegrezTests/LMDataForTests.swift @@ -11,9 +11,9 @@ class SimpleLM: LangModelProtocol { var mutDatabase: [String: [Megrez.Unigram]] = [:] init(input: String, swapKeyValue: Bool = false) { let sstream = input.components(separatedBy: "\n") - for line in sstream { + sstream.forEach { line in if line.isEmpty || line.hasPrefix("#") { - continue + return } let linestream = line.split(separator: " ") let col0 = String(linestream[0]) diff --git a/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift b/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift index 44e4c6b2..fe641775 100644 --- a/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift +++ b/Packages/vChewing_Megrez/Tests/MegrezTests/MegrezTests.swift @@ -11,43 +11,43 @@ import XCTest final class MegrezTests: XCTestCase { func test01_Span() throws { let langModel = SimpleLM(input: strSampleData) - let span = Megrez.Compositor.SpanUnit() - let n1 = Megrez.Compositor.Node( + var span = Megrez.SpanUnit() + let n1 = Megrez.Node( keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(keyArray: ["gao1"]) ) - let n3 = Megrez.Compositor.Node( + let n3 = Megrez.Node( keyArray: ["gao1ke1ji4"], spanLength: 3, unigrams: langModel.unigramsFor(keyArray: ["gao1ke1ji4"]) ) XCTAssertEqual(span.maxLength, 0) - span.append(node: n1) + span.addNode(node: n1) XCTAssertEqual(span.maxLength, 1) - span.append(node: n3) + span.addNode(node: n3) XCTAssertEqual(span.maxLength, 3) - XCTAssertEqual(span.nodeOf(length: 1), n1) - XCTAssertEqual(span.nodeOf(length: 2), nil) - XCTAssertEqual(span.nodeOf(length: 3), n3) - XCTAssertEqual(span.nodeOf(length: Megrez.Compositor.maxSpanLength), nil) - span.clear() + XCTAssertEqual(span[1], n1) + XCTAssertEqual(span[2], nil) + XCTAssertEqual(span[3], n3) + XCTAssertEqual(span[Megrez.Compositor.maxSpanLength], nil) + span.removeAll() XCTAssertEqual(span.maxLength, 0) - XCTAssertEqual(span.nodeOf(length: 1), nil) - XCTAssertEqual(span.nodeOf(length: 2), nil) - XCTAssertEqual(span.nodeOf(length: 3), nil) - XCTAssertEqual(span.nodeOf(length: Megrez.Compositor.maxSpanLength), nil) + XCTAssertEqual(span[1], nil) + XCTAssertEqual(span[2], nil) + XCTAssertEqual(span[3], nil) + XCTAssertEqual(span[Megrez.Compositor.maxSpanLength], nil) - span.append(node: n1) - span.append(node: n3) + span.addNode(node: n1) + span.addNode(node: n3) span.dropNodesOfOrBeyond(length: 2) XCTAssertEqual(span.maxLength, 1) - XCTAssertEqual(span.nodeOf(length: 1), n1) - XCTAssertEqual(span.nodeOf(length: 2), nil) - XCTAssertEqual(span.nodeOf(length: 3), nil) + XCTAssertEqual(span[1], n1) + XCTAssertEqual(span[2], nil) + XCTAssertEqual(span[3], nil) span.dropNodesOfOrBeyond(length: 1) XCTAssertEqual(span.maxLength, 0) - XCTAssertEqual(span.nodeOf(length: 1), nil) - let n114514 = Megrez.Compositor.Node(spanLength: 114_514) - XCTAssertFalse(span.append(node: n114514)) - XCTAssertNil(span.nodeOf(length: 0)) - XCTAssertNil(span.nodeOf(length: Megrez.Compositor.maxSpanLength + 1)) + XCTAssertEqual(span[1], nil) + let n114514 = Megrez.Node(spanLength: 114_514) + XCTAssertFalse(span.addNode(node: n114514)) + XCTAssertNil(span[0]) + XCTAssertNil(span[Megrez.Compositor.maxSpanLength + 1]) } func test02_RankedLangModel() throws { @@ -85,7 +85,7 @@ final class MegrezTests: XCTestCase { XCTAssertEqual(compositor.length, 1) XCTAssertEqual(compositor.spans.count, 1) XCTAssertEqual(compositor.spans[0].maxLength, 1) - guard let zeroNode = compositor.spans[0].nodeOf(length: 1) else { + guard let zeroNode = compositor.spans[0][1] else { print("fuckme") return } @@ -149,14 +149,14 @@ final class MegrezTests: XCTestCase { XCTAssertEqual(compositor.length, 3) XCTAssertEqual(compositor.spans.count, 3) XCTAssertEqual(compositor.spans[0].maxLength, 3) - XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") - XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;b") - XCTAssertEqual(compositor.spans[0].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "a;b;c") + XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a") + XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;b") + XCTAssertEqual(compositor.spans[0][3]?.keyArray.joined(separator: compositor.separator), "a;b;c") XCTAssertEqual(compositor.spans[1].maxLength, 2) - XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b") - XCTAssertEqual(compositor.spans[1].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "b;c") + XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "b") + XCTAssertEqual(compositor.spans[1][2]?.keyArray.joined(separator: compositor.separator), "b;c") XCTAssertEqual(compositor.spans[2].maxLength, 1) - XCTAssertEqual(compositor.spans[2].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") + XCTAssertEqual(compositor.spans[2][1]?.keyArray.joined(separator: compositor.separator), "c") } func test07_Compositor_SpanDeletionFromFront() throws { @@ -171,10 +171,10 @@ final class MegrezTests: XCTestCase { XCTAssertEqual(compositor.length, 2) XCTAssertEqual(compositor.spans.count, 2) XCTAssertEqual(compositor.spans[0].maxLength, 2) - XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") - XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;b") + XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a") + XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;b") XCTAssertEqual(compositor.spans[1].maxLength, 1) - XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b") + XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "b") } func test08_Compositor_SpanDeletionFromMiddle() throws { @@ -190,10 +190,10 @@ final class MegrezTests: XCTestCase { XCTAssertEqual(compositor.length, 2) XCTAssertEqual(compositor.spans.count, 2) XCTAssertEqual(compositor.spans[0].maxLength, 2) - XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") - XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;c") + XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a") + XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;c") XCTAssertEqual(compositor.spans[1].maxLength, 1) - XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") + XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "c") compositor.clear() compositor.insertKey("a") @@ -206,10 +206,10 @@ final class MegrezTests: XCTestCase { XCTAssertEqual(compositor.length, 2) XCTAssertEqual(compositor.spans.count, 2) XCTAssertEqual(compositor.spans[0].maxLength, 2) - XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") - XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;c") + XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a") + XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;c") XCTAssertEqual(compositor.spans[1].maxLength, 1) - XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") + XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "c") } func test09_Compositor_SpanDeletionFromRear() throws { @@ -226,10 +226,10 @@ final class MegrezTests: XCTestCase { XCTAssertEqual(compositor.length, 2) XCTAssertEqual(compositor.spans.count, 2) XCTAssertEqual(compositor.spans[0].maxLength, 2) - XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b") - XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "b;c") + XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "b") + XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "b;c") XCTAssertEqual(compositor.spans[1].maxLength, 1) - XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") + XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "c") } func test10_Compositor_SpanInsertion() throws { @@ -245,19 +245,19 @@ final class MegrezTests: XCTestCase { XCTAssertEqual(compositor.length, 4) XCTAssertEqual(compositor.spans.count, 4) XCTAssertEqual(compositor.spans[0].maxLength, 4) - XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") - XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;X") - XCTAssertEqual(compositor.spans[0].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "a;X;b") - XCTAssertEqual(compositor.spans[0].nodeOf(length: 4)?.keyArray.joined(separator: compositor.separator), "a;X;b;c") + XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a") + XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;X") + XCTAssertEqual(compositor.spans[0][3]?.keyArray.joined(separator: compositor.separator), "a;X;b") + XCTAssertEqual(compositor.spans[0][4]?.keyArray.joined(separator: compositor.separator), "a;X;b;c") XCTAssertEqual(compositor.spans[1].maxLength, 3) - XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "X") - XCTAssertEqual(compositor.spans[1].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "X;b") - XCTAssertEqual(compositor.spans[1].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "X;b;c") + XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "X") + XCTAssertEqual(compositor.spans[1][2]?.keyArray.joined(separator: compositor.separator), "X;b") + XCTAssertEqual(compositor.spans[1][3]?.keyArray.joined(separator: compositor.separator), "X;b;c") XCTAssertEqual(compositor.spans[2].maxLength, 2) - XCTAssertEqual(compositor.spans[2].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b") - XCTAssertEqual(compositor.spans[2].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "b;c") + XCTAssertEqual(compositor.spans[2][1]?.keyArray.joined(separator: compositor.separator), "b") + XCTAssertEqual(compositor.spans[2][2]?.keyArray.joined(separator: compositor.separator), "b;c") XCTAssertEqual(compositor.spans[3].maxLength, 1) - XCTAssertEqual(compositor.spans[3].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") + XCTAssertEqual(compositor.spans[3][1]?.keyArray.joined(separator: compositor.separator), "c") } func test11_Compositor_LongGridDeletion() throws { @@ -282,17 +282,17 @@ final class MegrezTests: XCTestCase { XCTAssertEqual(compositor.cursor, 6) XCTAssertEqual(compositor.length, 13) XCTAssertEqual(compositor.spans.count, 13) - XCTAssertEqual(compositor.spans[0].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "abcdef") - XCTAssertEqual(compositor.spans[1].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "bcdefh") - XCTAssertEqual(compositor.spans[1].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "bcdef") - XCTAssertEqual(compositor.spans[2].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "cdefhi") - XCTAssertEqual(compositor.spans[2].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "cdefh") - XCTAssertEqual(compositor.spans[3].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "defhij") - XCTAssertEqual(compositor.spans[4].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "efhijk") - XCTAssertEqual(compositor.spans[5].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "fhijkl") - XCTAssertEqual(compositor.spans[6].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "hijklm") - XCTAssertEqual(compositor.spans[7].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "ijklmn") - XCTAssertEqual(compositor.spans[8].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "jklmn") + XCTAssertEqual(compositor.spans[0][6]?.keyArray.joined(separator: compositor.separator), "abcdef") + XCTAssertEqual(compositor.spans[1][6]?.keyArray.joined(separator: compositor.separator), "bcdefh") + XCTAssertEqual(compositor.spans[1][5]?.keyArray.joined(separator: compositor.separator), "bcdef") + XCTAssertEqual(compositor.spans[2][6]?.keyArray.joined(separator: compositor.separator), "cdefhi") + XCTAssertEqual(compositor.spans[2][5]?.keyArray.joined(separator: compositor.separator), "cdefh") + XCTAssertEqual(compositor.spans[3][6]?.keyArray.joined(separator: compositor.separator), "defhij") + XCTAssertEqual(compositor.spans[4][6]?.keyArray.joined(separator: compositor.separator), "efhijk") + XCTAssertEqual(compositor.spans[5][6]?.keyArray.joined(separator: compositor.separator), "fhijkl") + XCTAssertEqual(compositor.spans[6][6]?.keyArray.joined(separator: compositor.separator), "hijklm") + XCTAssertEqual(compositor.spans[7][6]?.keyArray.joined(separator: compositor.separator), "ijklmn") + XCTAssertEqual(compositor.spans[8][5]?.keyArray.joined(separator: compositor.separator), "jklmn") } func test12_Compositor_LongGridInsertion() throws { @@ -317,25 +317,25 @@ final class MegrezTests: XCTestCase { XCTAssertEqual(compositor.cursor, 8) XCTAssertEqual(compositor.length, 15) XCTAssertEqual(compositor.spans.count, 15) - XCTAssertEqual(compositor.spans[0].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "abcdef") - XCTAssertEqual(compositor.spans[1].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "bcdefg") - XCTAssertEqual(compositor.spans[2].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "cdefgX") - XCTAssertEqual(compositor.spans[3].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "defgXh") - XCTAssertEqual(compositor.spans[3].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "defgX") - XCTAssertEqual(compositor.spans[4].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "efgXhi") - XCTAssertEqual(compositor.spans[4].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "efgXh") - XCTAssertEqual(compositor.spans[4].nodeOf(length: 4)?.keyArray.joined(separator: compositor.separator), "efgX") - XCTAssertEqual(compositor.spans[4].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "efg") - XCTAssertEqual(compositor.spans[5].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "fgXhij") - XCTAssertEqual(compositor.spans[6].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "gXhijk") - XCTAssertEqual(compositor.spans[7].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "Xhijkl") - XCTAssertEqual(compositor.spans[8].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "hijklm") + XCTAssertEqual(compositor.spans[0][6]?.keyArray.joined(separator: compositor.separator), "abcdef") + XCTAssertEqual(compositor.spans[1][6]?.keyArray.joined(separator: compositor.separator), "bcdefg") + XCTAssertEqual(compositor.spans[2][6]?.keyArray.joined(separator: compositor.separator), "cdefgX") + XCTAssertEqual(compositor.spans[3][6]?.keyArray.joined(separator: compositor.separator), "defgXh") + XCTAssertEqual(compositor.spans[3][5]?.keyArray.joined(separator: compositor.separator), "defgX") + XCTAssertEqual(compositor.spans[4][6]?.keyArray.joined(separator: compositor.separator), "efgXhi") + XCTAssertEqual(compositor.spans[4][5]?.keyArray.joined(separator: compositor.separator), "efgXh") + XCTAssertEqual(compositor.spans[4][4]?.keyArray.joined(separator: compositor.separator), "efgX") + XCTAssertEqual(compositor.spans[4][3]?.keyArray.joined(separator: compositor.separator), "efg") + XCTAssertEqual(compositor.spans[5][6]?.keyArray.joined(separator: compositor.separator), "fgXhij") + XCTAssertEqual(compositor.spans[6][6]?.keyArray.joined(separator: compositor.separator), "gXhijk") + XCTAssertEqual(compositor.spans[7][6]?.keyArray.joined(separator: compositor.separator), "Xhijkl") + XCTAssertEqual(compositor.spans[8][6]?.keyArray.joined(separator: compositor.separator), "hijklm") } func test13_Compositor_StressBench() throws { NSLog("// Stress test preparation begins.") var compositor = Megrez.Compositor(with: SimpleLM(input: strStressData)) - for _ in 0 ..< 1919 { + (0 ..< 1919).forEach { _ in compositor.insertKey("yi") } NSLog("// Stress test started.") @@ -348,8 +348,8 @@ final class MegrezTests: XCTestCase { func test14_Compositor_WordSegmentation() throws { var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData, swapKeyValue: true)) compositor.separator = "" - for i in "高科技公司的年終獎金" { - compositor.insertKey(String(i)) + "高科技公司的年終獎金".forEach { i in + compositor.insertKey(i.description) } let result = compositor.walk().0 XCTAssertEqual(result.joinedKeys(by: ""), ["高科技", "公司", "的", "年終", "獎金"]) @@ -546,4 +546,17 @@ final class MegrezTests: XCTestCase { print(newResult2) XCTAssertEqual(newResult2, ["年", "中"]) } + + func test21_Compositor_hardCopy() throws { + let theLM = SimpleLM(input: strSampleData) + let rawReadings = "gao1 ke1 ji4 gong1 si1 de5 nian2 zhong1 jiang3 jin1" + var compositorA = Megrez.Compositor(with: theLM) + rawReadings.split(separator: " ").forEach { key in + compositorA.insertKey(key.description) + } + var compositorB = compositorA.hardCopy + let resultA = compositorA.walk().walkedNodes + let resultB = compositorB.walk().walkedNodes + XCTAssertEqual(resultA, resultB) + } }