Megrez // Compositor refactoration with hard copy support.

This commit is contained in:
ShikiSuen 2023-03-09 22:28:06 +08:00
parent 22ead33ba9
commit 490a646f88
9 changed files with 291 additions and 243 deletions

View File

@ -66,6 +66,26 @@ public extension Megrez {
self.separator = separator self.separator = separator
} }
///
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
public init(from target: Compositor) {
cursor = target.cursor
marker = target.marker
separator = target.separator
walkedNodes = target.walkedNodes.map(\.copy)
keys = target.keys
spans = target.spans.map(\.hardCopy)
langModel = target.langModel
}
///
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
public var hardCopy: Compositor { .init(from: self) }
/// ///
/// ///
/// ///
@ -167,21 +187,19 @@ public extension Megrez {
public var dumpDOT: String { public var dumpDOT: String {
// C# StringBuilder Swift NSMutableString // C# StringBuilder Swift NSMutableString
let strOutput: NSMutableString = .init(string: "digraph {\ngraph [ rankdir=LR ];\nBOS;\n") let strOutput: NSMutableString = .init(string: "digraph {\ngraph [ rankdir=LR ];\nBOS;\n")
for (p, span) in spans.enumerated() { spans.enumerated().forEach { p, span in
for ni in 0 ... (span.maxLength) { (0 ... span.maxLength).forEach { ni in
guard let np = span.nodeOf(length: ni) else { continue } guard let np = span[ni] else { return }
if p == 0 { if p == 0 { strOutput.append("BOS -> \(np.value);\n") }
strOutput.append("BOS -> \(np.value);\n")
}
strOutput.append("\(np.value);\n") strOutput.append("\(np.value);\n")
if (p + ni) < spans.count { if (p + ni) < spans.count {
let destinationSpan = spans[p + ni] let destinationSpan = spans[p + ni]
for q in 0 ... (destinationSpan.maxLength) { (0 ... destinationSpan.maxLength).forEach { q in
guard let dn = destinationSpan.nodeOf(length: q) else { continue } guard let dn = destinationSpan[q] else { return }
strOutput.append(np.value + " -> " + dn.value + ";\n") strOutput.append(np.value + " -> " + dn.value + ";\n")
} }
} }
guard (p + ni) == spans.count else { continue } guard (p + ni) == spans.count else { return }
strOutput.append(np.value + " -> EOS;\n") strOutput.append(np.value + " -> EOS;\n")
} }
} }
@ -198,11 +216,11 @@ extension Megrez.Compositor {
/// - Parameters: /// - Parameters:
/// - location: /// - location:
/// - action: /// - action:
mutating func resizeGrid(at location: Int, do action: ResizeBehavior) { private mutating func resizeGrid(at location: Int, do action: ResizeBehavior) {
let location = max(min(location, spans.count), 0) // let location = max(min(location, spans.count), 0) //
switch action { switch action {
case .expand: case .expand:
spans.insert(SpanUnit(), at: location) spans.insert(.init(), at: location)
if [0, spans.count].contains(location) { return } if [0, spans.count].contains(location) { return }
case .shrink: case .shrink:
if spans.count == location { return } if spans.count == location { return }
@ -248,60 +266,54 @@ extension Megrez.Compositor {
let affectedLength = Megrez.Compositor.maxSpanLength - 1 let affectedLength = Megrez.Compositor.maxSpanLength - 1
let begin = max(0, location - affectedLength) let begin = max(0, location - affectedLength)
guard location >= begin else { return } guard location >= begin else { return }
for i in begin ..< location { (begin ..< location).forEach { delta in
spans[i].dropNodesOfOrBeyond(length: location - i + 1) ((location - delta + 1) ... Self.maxSpanLength).forEach { theLength in
spans[delta][theLength] = nil
}
} }
} }
/// ///
/// - Parameter range: /// - Parameter range:
/// - Returns: /// - Returns:
func getJoinedKeyArray(range: Range<Int>) -> [String] { private func getJoinedKeyArray(range: Range<Int>) -> [String] {
// contains macOS 13 Ventura // contains macOS 13 Ventura
guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] } guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] }
return keys[range].map(\.description) return keys[range].map(\.description)
} }
///
/// - Parameters:
/// - location:
/// - length:
/// - keyArray:
/// - Returns: nil
func getNode(at location: Int, length: Int, keyArray: [String]) -> Node? {
let location = max(min(location, spans.count - 1), 0) //
guard let node = spans[location].nodeOf(length: length) else { return nil }
return keyArray == node.keyArray ? node : nil
}
/// ///
/// - Parameter updateExisting: /// - Parameter updateExisting:
/// ///
/// - Returns: 0 /// - Returns: 0
@discardableResult public mutating func update(updateExisting: Bool = false) -> Int { @discardableResult public mutating func update(updateExisting: Bool = false) -> Int {
let maxSpanLength = Megrez.Compositor.maxSpanLength let maxSpanLength = Megrez.Compositor.maxSpanLength
let range = max(0, cursor - maxSpanLength) ..< min(cursor + maxSpanLength, keys.count) let rangeOfPositions = max(0, cursor - maxSpanLength) ..< min(cursor + maxSpanLength, keys.count)
var nodesChanged = 0 var nodesChanged = 0
for position in range { rangeOfPositions.forEach { position in
for theLength in 1 ... min(maxSpanLength, range.upperBound - position) { let rangeOfLengths = 1 ... min(maxSpanLength, rangeOfPositions.upperBound - position)
let joinedKeyArray = getJoinedKeyArray(range: position ..< (position + theLength)) rangeOfLengths.forEach { theLength in
if let theNode = getNode(at: position, length: theLength, keyArray: joinedKeyArray) { guard position + theLength <= keys.count, position >= 0 else { return }
if !updateExisting { continue } let joinedKeyArray = keys[position ..< (position + theLength)].map(\.description)
if let theNode = spans[position][theLength] {
if !updateExisting { return }
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray) let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
// //
if unigrams.isEmpty { if unigrams.isEmpty {
if theNode.keyArray.count == 1 { continue } if theNode.keyArray.count == 1 { return }
spans[position].nullify(node: theNode) spans[position][theNode.spanLength] = nil
} else { } else {
theNode.syncingUnigrams(from: unigrams) theNode.syncingUnigrams(from: unigrams)
} }
nodesChanged += 1 nodesChanged += 1
continue return
} }
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray) let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
guard !unigrams.isEmpty else { continue } guard !unigrams.isEmpty else { return }
spans[position].append( // SpanUnit.addNode
node: .init(keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams) spans[position][theLength] = .init(
keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams
) )
nodesChanged += 1 nodesChanged += 1
} }

View File

@ -13,37 +13,34 @@ public extension Megrez.Compositor {
/// `G = (V, E)` `O(|V|+|E|)` `G` /// `G = (V, E)` `O(|V|+|E|)` `G`
/// 使 /// 使
/// - Returns: /// - Returns:
@discardableResult mutating func walk() -> (walkedNode: [Node], succeeded: Bool) { @discardableResult mutating func walk() -> (walkedNodes: [Megrez.Node], succeeded: Bool) {
var result = [Node]() var result = [Megrez.Node]()
defer { walkedNodes = result } defer { walkedNodes = result }
guard !spans.isEmpty else { return (result, true) } guard !spans.isEmpty else { return (result, true) }
var vertexSpans = [[Vertex]]() var vertexSpans = [[Vertex]]()
for _ in spans { spans.forEach { _ in
vertexSpans.append(.init()) vertexSpans.append(.init())
} }
for (i, span) in spans.enumerated() { spans.enumerated().forEach { i, span in
for j in 1 ... max(span.maxLength, 1) { (1 ... max(span.maxLength, 1)).forEach { j in
if let theNode = span.nodeOf(length: j) { guard let theNode = span[j] else { return }
vertexSpans[i].append(.init(node: theNode)) vertexSpans[i].append(.init(node: theNode))
} }
} }
}
let terminal = Vertex(node: .init(keyArray: ["_TERMINAL_"])) let terminal = Vertex(node: .init(keyArray: ["_TERMINAL_"]))
var root = Vertex(node: .init(keyArray: ["_ROOT_"])) var root = Vertex(node: .init(keyArray: ["_ROOT_"]))
for (i, vertexSpan) in vertexSpans.enumerated() { vertexSpans.enumerated().forEach { i, vertexSpan in
for vertex in vertexSpan { vertexSpan.forEach { vertex in
let nextVertexPosition = i + vertex.node.spanLength let nextVertexPosition = i + vertex.node.spanLength
if nextVertexPosition == vertexSpans.count { if nextVertexPosition == vertexSpans.count {
vertex.edges.append(terminal) vertex.edges.append(terminal)
continue return
}
for nextVertex in vertexSpans[nextVertexPosition] {
vertex.edges.append(nextVertex)
} }
vertexSpans[nextVertexPosition].forEach { vertex.edges.append($0) }
} }
} }
@ -51,15 +48,13 @@ public extension Megrez.Compositor {
root.edges.append(contentsOf: vertexSpans[0]) root.edges.append(contentsOf: vertexSpans[0])
var ordered = topologicalSort(root: &root) var ordered = topologicalSort(root: &root)
for (j, neta) in ordered.reversed().enumerated() { ordered.reversed().enumerated().forEach { j, neta in
for (k, _) in neta.edges.enumerated() { neta.edges.indices.forEach { relax(u: neta, v: &neta.edges[$0]) }
relax(u: neta, v: &neta.edges[k])
}
ordered[j] = neta ordered[j] = neta
} }
var iterated = terminal var iterated = terminal
var walked = [Node]() var walked = [Megrez.Node]()
var totalLengthOfKeys = 0 var totalLengthOfKeys = 0
while let itPrev = iterated.prev { while let itPrev = iterated.prev {

View File

@ -5,7 +5,7 @@
import Foundation import Foundation
public extension Megrez.Compositor { public extension Megrez {
/// ///
struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible { struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible {
/// ///
@ -18,6 +18,8 @@ public extension Megrez.Compositor {
public var isValid: Bool { !keyArray.joined().isEmpty && !value.isEmpty } public var isValid: Bool { !keyArray.joined().isEmpty && !value.isEmpty }
/// () /// ()
public var toNGramKey: String { !isValid ? "()" : "(" + joinedKey() + "," + value + ")" } public var toNGramKey: String { !isValid ? "()" : "(" + joinedKey() + "," + value + ")" }
///
public var tupletExpression: (keyArray: [String], value: String) { (keyArray, value) }
/// ///
/// - Parameters: /// - Parameters:
@ -28,6 +30,13 @@ public extension Megrez.Compositor {
self.value = value.isEmpty ? "N/A" : value self.value = value.isEmpty ? "N/A" : value
} }
///
/// - Parameter tupletExpression:
public init(_ tupletExpression: (keyArray: [String], value: String)) {
keyArray = tupletExpression.keyArray.isEmpty ? ["N/A"] : tupletExpression.keyArray
value = tupletExpression.value.isEmpty ? "N/A" : tupletExpression.value
}
/// ///
/// - Parameters: /// - Parameters:
/// - key: /// - key:
@ -72,7 +81,9 @@ public extension Megrez.Compositor {
|| (lhs.keyArray.count == rhs.keyArray.count && lhs.value >= rhs.value) || (lhs.keyArray.count == rhs.keyArray.count && lhs.value >= rhs.value)
} }
} }
}
public extension Megrez.Compositor {
/// ///
/// - all: 穿 /// - all: 穿
/// - beginAt: /// - beginAt:
@ -84,8 +95,8 @@ public extension Megrez.Compositor {
/// location - 1 /// location - 1
/// - Parameter location: /// - Parameter location:
/// - Returns: /// - Returns:
func fetchCandidates(at location: Int, filter: CandidateFetchFilter = .all) -> [KeyValuePaired] { func fetchCandidates(at location: Int, filter: CandidateFetchFilter = .all) -> [Megrez.KeyValuePaired] {
var result = [KeyValuePaired]() var result = [Megrez.KeyValuePaired]()
guard !keys.isEmpty else { return result } guard !keys.isEmpty else { return result }
let location = max(min(location, keys.count - 1), 0) // let location = max(min(location, keys.count - 1), 0) //
let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted { let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted {
@ -93,17 +104,16 @@ public extension Megrez.Compositor {
$0.spanLength > $1.spanLength $0.spanLength > $1.spanLength
} }
let keyAtCursor = keys[location] let keyAtCursor = keys[location]
for theNode in anchors.map(\.node) { anchors.map(\.node).filter(\.keyArray.isEmpty.negative).forEach { theNode in
if theNode.keyArray.isEmpty { continue } theNode.unigrams.forEach { gram in
for gram in theNode.unigrams {
switch filter { switch filter {
case .all: case .all:
// //
if !theNode.keyArray.contains(keyAtCursor) { continue } if !theNode.keyArray.contains(keyAtCursor) { return }
case .beginAt: case .beginAt:
if theNode.keyArray[0] != keyAtCursor { continue } if theNode.keyArray[0] != keyAtCursor { return }
case .endAt: case .endAt:
if theNode.keyArray.reversed()[0] != keyAtCursor { continue } if theNode.keyArray.reversed()[0] != keyAtCursor { return }
} }
result.append(.init(keyArray: theNode.keyArray, value: gram.value)) result.append(.init(keyArray: theNode.keyArray, value: gram.value))
} }
@ -120,7 +130,7 @@ public extension Megrez.Compositor {
/// - overrideType: /// - overrideType:
/// - Returns: /// - Returns:
@discardableResult func overrideCandidate( @discardableResult func overrideCandidate(
_ candidate: KeyValuePaired, at location: Int, overrideType: Node.OverrideType = .withHighScore _ candidate: Megrez.KeyValuePaired, at location: Int, overrideType: Megrez.Node.OverrideType = .withHighScore
) )
-> Bool -> Bool
{ {
@ -137,7 +147,7 @@ public extension Megrez.Compositor {
/// - Returns: /// - Returns:
@discardableResult func overrideCandidateLiteral( @discardableResult func overrideCandidateLiteral(
_ candidate: String, _ candidate: String,
at location: Int, overrideType: Node.OverrideType = .withHighScore at location: Int, overrideType: Megrez.Node.OverrideType = .withHighScore
) -> Bool { ) -> Bool {
overrideCandidateAgainst(keyArray: nil, at: location, value: candidate, type: overrideType) overrideCandidateAgainst(keyArray: nil, at: location, value: candidate, type: overrideType)
} }
@ -151,7 +161,7 @@ public extension Megrez.Compositor {
/// - value: /// - value:
/// - type: /// - type:
/// - Returns: /// - Returns:
internal func overrideCandidateAgainst(keyArray: [String]?, at location: Int, value: String, type: Node.OverrideType) internal func overrideCandidateAgainst(keyArray: [String]?, at location: Int, value: String, type: Megrez.Node.OverrideType)
-> Bool -> Bool
{ {
let location = max(min(location, keys.count), 0) // let location = max(min(location, keys.count), 0) //
@ -166,18 +176,18 @@ public extension Megrez.Compositor {
guard let overridden = overridden else { return false } // guard let overridden = overridden else { return false } //
for i in overridden.spanIndex ..< min(spans.count, overridden.spanIndex + overridden.node.spanLength) { (overridden.spanIndex ..< min(spans.count, overridden.spanIndex + overridden.node.spanLength)).forEach { i in
/// A BC /// A BC
/// A BC 使 A /// A BC 使 A
/// DEF BC A /// DEF BC A
arrOverlappedNodes = fetchOverlappingNodes(at: i) arrOverlappedNodes = fetchOverlappingNodes(at: i)
for anchor in arrOverlappedNodes { arrOverlappedNodes.forEach { anchor in
if anchor.node == overridden.node { continue } if anchor.node == overridden.node { return }
if !overridden.node.joinedKey(by: "\t").contains(anchor.node.joinedKey(by: "\t")) if !overridden.node.joinedKey(by: "\t").contains(anchor.node.joinedKey(by: "\t"))
|| !overridden.node.value.contains(anchor.node.value) || !overridden.node.value.contains(anchor.node.value)
{ {
anchor.node.reset() anchor.node.reset()
continue return
} }
anchor.node.overridingScore /= 4 anchor.node.overridingScore /= 4
} }
@ -208,3 +218,9 @@ private extension Sequence {
.map(\.element) .map(\.element)
} }
} }
// MARK: - Bool Extension (Private)
extension Bool {
var negative: Bool { !self }
}

View File

@ -3,90 +3,84 @@
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez.Compositor { public extension Megrez {
/// /// [: ]
public class SpanUnit { typealias SpanUnit = [Int: Node]
/// nil }
public var nodes: [Int: Node] = [:]
public extension Megrez.SpanUnit {
/// [: ]
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
init(SpanUnit target: Megrez.SpanUnit) {
self.init()
target.forEach { theKey, theValue in
self[theKey] = theValue.copy
}
}
///
var hardCopy: Megrez.SpanUnit { .init(SpanUnit: self) }
// MARK: - Dynamic Variables
/// ///
/// ///
public var maxLength: Int { nodes.keys.max() ?? 0 } var maxLength: Int { keys.max() ?? 0 }
/// Megrez.Compositor.maxSpanLength /// Megrez.Compositor.maxSpanLength
private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength } private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength }
/// ///
private var allowedLengths: ClosedRange<Int> { 1 ... maxSpanLength } private var allowedLengths: ClosedRange<Int> { 1 ... maxSpanLength }
/// // MARK: - Functions
public init() {
clear()
}
/// 0
public func clear() {
nodes.removeAll()
}
/// ///
/// - Remark:
/// - Parameter node: /// - Parameter node:
/// - Returns: /// - Returns:
@discardableResult public func append(node: Node) -> Bool { @discardableResult mutating func addNode(node: Megrez.Node) -> Bool {
guard allowedLengths.contains(node.spanLength) else { return false } guard allowedLengths.contains(node.spanLength) else { return false }
nodes[node.spanLength] = node self[node.spanLength] = node
return true return true
} }
///
/// - Remark: Swift C#
///
///
/// - Parameter node:
public func nullify(node givenNode: Node) {
let spanLength = givenNode.spanLength
nodes[spanLength] = nil
}
/// ///
/// - Remark:
/// - Parameter length: /// - Parameter length:
/// - Returns: /// - Returns:
@discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool { @discardableResult mutating func dropNodesOfOrBeyond(length: Int) -> Bool {
guard allowedLengths.contains(length) else { return false } guard allowedLengths.contains(length) else { return false }
let length = min(length, maxSpanLength) let length = Swift.min(length, maxSpanLength)
(length ... maxSpanLength).forEach { nodes[$0] = nil } (length ... maxSpanLength).forEach { self[$0] = nil }
return true return true
} }
}
/// // MARK: - Related Compositor Implementations.
/// - Parameter length:
/// - Returns:
public func nodeOf(length: Int) -> Node? {
guard allowedLengths.contains(length) else { return nil }
return nodes[length]
}
}
// MARK: Internal implementations.
extension Megrez.Compositor {
/// ///
/// - Parameter location: /// - Parameter location:
/// - Returns: /// - Returns:
internal func fetchOverlappingNodes(at location: Int) -> [NodeAnchor] { func fetchOverlappingNodes(at givenLocation: Int) -> [NodeAnchor] {
var results = [NodeAnchor]() var results = [NodeAnchor]()
guard !spans.isEmpty, location < spans.count else { return results } guard !spans.isEmpty, givenLocation < spans.count else { return results }
// //
for theLocation in 1 ... spans[location].maxLength { (1 ... max(spans[givenLocation].maxLength, 1)).forEach { theSpanLength in
guard let node = spans[location].nodeOf(length: theLocation) else { continue } guard let node = spans[givenLocation][theSpanLength] else { return }
results.append(.init(node: node, spanIndex: location)) results.append(.init(node: node, spanIndex: givenLocation))
} }
// //
let begin: Int = location - min(location, Megrez.Compositor.maxSpanLength - 1) let begin: Int = givenLocation - min(givenLocation, Megrez.Compositor.maxSpanLength - 1)
for theLocation in begin ..< location { (begin ..< givenLocation).forEach { theLocation in
let (A, B): (Int, Int) = (location - theLocation + 1, spans[theLocation].maxLength) let (A, B): (Int, Int) = (givenLocation - theLocation + 1, spans[theLocation].maxLength)
guard A <= B else { continue } guard A <= B else { return }
for theLength in A ... B { (A ... B).forEach { theLength in
guard let node = spans[theLocation].nodeOf(length: theLength) else { continue } guard let node = spans[theLocation][theLength] else { return }
results.append(.init(node: node, spanIndex: theLocation)) results.append(.init(node: node, spanIndex: theLocation))
} }
} }

View File

@ -20,13 +20,13 @@ extension Megrez.Compositor {
/// ///
public var topologicallySorted = false public var topologicallySorted = false
/// ///
public var node: Node public var node: Megrez.Node
/// ///
/// ///
/// ///
/// - Parameter node: /// - Parameter node:
public init(node: Node) { public init(node: Megrez.Node) {
self.node = node self.node = node
} }
@ -65,7 +65,7 @@ extension Megrez.Compositor {
/// ///
/// ``` /// ```
/// func topologicalSort(vertex: Vertex) { /// func topologicalSort(vertex: Vertex) {
/// for vertexNode in vertex.edges { /// vertex.edges.forEach {vertexNode in
/// if !vertexNode.topologicallySorted { /// if !vertexNode.topologicallySorted {
/// dfs(vertexNode, result) /// dfs(vertexNode, result)
/// vertexNode.topologicallySorted = true /// vertexNode.topologicallySorted = true

View File

@ -5,7 +5,7 @@
import Foundation import Foundation
public extension Megrez.Compositor { public extension Megrez {
/// ///
/// ///
/// ///
@ -38,8 +38,6 @@ public extension Megrez.Compositor {
/// c /// c
public var overridingScore: Double = 114_514 public var overridingScore: Double = 114_514
// public var key: String { keyArray.joined(separator: Megrez.Compositor.theSeparator) }
/// ///
public private(set) var keyArray: [String] public private(set) var keyArray: [String]
/// ///
@ -54,21 +52,22 @@ public extension Megrez.Compositor {
} }
/// ///
public var currentPair: Megrez.Compositor.KeyValuePaired { .init(keyArray: keyArray, value: value) } public var currentPair: Megrez.KeyValuePaired { .init(keyArray: keyArray, value: value) }
/// ///
/// - Parameter hasher: /// - Parameter hasher:
public func hash(into hasher: inout Hasher) { public func hash(into hasher: inout Hasher) {
hasher.combine(overridingScore)
hasher.combine(keyArray) hasher.combine(keyArray)
hasher.combine(spanLength) hasher.combine(spanLength)
hasher.combine(unigrams) hasher.combine(unigrams)
hasher.combine(currentUnigramIndex)
hasher.combine(spanLength)
hasher.combine(currentOverrideType) hasher.combine(currentOverrideType)
hasher.combine(currentUnigramIndex)
} }
public static func == (lhs: Node, rhs: Node) -> Bool { public static func == (lhs: Node, rhs: Node) -> Bool {
lhs.keyArray == rhs.keyArray && lhs.spanLength == rhs.spanLength lhs.overridingScore == rhs.overridingScore && lhs.spanLength == rhs.spanLength
&& lhs.keyArray == rhs.keyArray && lhs.currentUnigramIndex == rhs.currentUnigramIndex
&& lhs.unigrams == rhs.unigrams && lhs.currentOverrideType == rhs.currentOverrideType && lhs.unigrams == rhs.unigrams && lhs.currentOverrideType == rhs.currentOverrideType
} }
@ -90,6 +89,25 @@ public extension Megrez.Compositor {
currentOverrideType = .withNoOverrides currentOverrideType = .withNoOverrides
} }
///
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
public init(node: Node) {
overridingScore = node.overridingScore
keyArray = node.keyArray
spanLength = node.spanLength
unigrams = node.unigrams
currentOverrideType = node.currentOverrideType
currentUnigramIndex = node.currentUnigramIndex
}
///
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
public var copy: Node { .init(node: self) }
/// ///
public var isReadingMismatched: Bool { keyArray.count != value.count } public var isReadingMismatched: Bool { keyArray.count != value.count }
/// ///
@ -162,7 +180,7 @@ public extension Megrez.Compositor {
/// Gramambular 2 NodeInSpan /// Gramambular 2 NodeInSpan
struct NodeAnchor: Hashable { struct NodeAnchor: Hashable {
/// ///
let node: Megrez.Compositor.Node let node: Megrez.Node
/// ///
let spanIndex: Int let spanIndex: Int
/// ///
@ -185,7 +203,7 @@ public extension Megrez.Compositor {
// MARK: - Array Extensions. // MARK: - Array Extensions.
public extension Array where Element == Megrez.Compositor.Node { public extension Array where Element == Megrez.Node {
/// ///
var values: [String] { map(\.value) } var values: [String] { map(\.value) }
@ -204,7 +222,7 @@ public extension Array where Element == Megrez.Compositor.Node {
var resultA = [Int: Int]() var resultA = [Int: Int]()
var resultB: [Int: Int] = [-1: 0] // var resultB: [Int: Int] = [-1: 0] //
var cursorCounter = 0 var cursorCounter = 0
for (nodeCounter, neta) in enumerated() { enumerated().forEach { nodeCounter, neta in
resultA[nodeCounter] = cursorCounter resultA[nodeCounter] = cursorCounter
neta.keyArray.forEach { _ in neta.keyArray.forEach { _ in
resultB[cursorCounter] = nodeCounter resultB[cursorCounter] = nodeCounter
@ -243,7 +261,7 @@ public extension Array where Element == Megrez.Compositor.Node {
/// - cursor: /// - cursor:
/// - outCursorPastNode: /// - outCursorPastNode:
/// - Returns: /// - Returns:
func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? { func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Node? {
guard !isEmpty else { return nil } guard !isEmpty else { return nil }
let cursor = Swift.max(0, Swift.min(cursor, totalKeyCount - 1)) // let cursor = Swift.max(0, Swift.min(cursor, totalKeyCount - 1)) //
let range = contextRange(ofGivenCursor: cursor) let range = contextRange(ofGivenCursor: cursor)
@ -255,7 +273,7 @@ public extension Array where Element == Megrez.Compositor.Node {
/// ///
/// - Parameter cursor: /// - Parameter cursor:
/// - Returns: /// - Returns:
func findNode(at cursor: Int) -> Megrez.Compositor.Node? { func findNode(at cursor: Int) -> Megrez.Node? {
var useless = 0 var useless = 0
return findNode(at: cursor, target: &useless) return findNode(at: cursor, target: &useless)
} }

View File

@ -48,8 +48,8 @@ public extension Array where Element == Megrez.Unigram {
mutating func consolidate(filter theFilter: Set<String> = .init()) { mutating func consolidate(filter theFilter: Set<String> = .init()) {
var inserted: [String: Double] = [:] var inserted: [String: Double] = [:]
var insertedArray: [Megrez.Unigram] = [] var insertedArray: [Megrez.Unigram] = []
for neta in filter({ !theFilter.contains($0.value) }) { filter { !theFilter.contains($0.value) }.forEach { neta in
if inserted.keys.contains(neta.value) { continue } if inserted.keys.contains(neta.value) { return }
inserted[neta.value] = neta.score inserted[neta.value] = neta.score
insertedArray.append(neta) insertedArray.append(neta)
} }

View File

@ -11,9 +11,9 @@ class SimpleLM: LangModelProtocol {
var mutDatabase: [String: [Megrez.Unigram]] = [:] var mutDatabase: [String: [Megrez.Unigram]] = [:]
init(input: String, swapKeyValue: Bool = false) { init(input: String, swapKeyValue: Bool = false) {
let sstream = input.components(separatedBy: "\n") let sstream = input.components(separatedBy: "\n")
for line in sstream { sstream.forEach { line in
if line.isEmpty || line.hasPrefix("#") { if line.isEmpty || line.hasPrefix("#") {
continue return
} }
let linestream = line.split(separator: " ") let linestream = line.split(separator: " ")
let col0 = String(linestream[0]) let col0 = String(linestream[0])

View File

@ -11,43 +11,43 @@ import XCTest
final class MegrezTests: XCTestCase { final class MegrezTests: XCTestCase {
func test01_Span() throws { func test01_Span() throws {
let langModel = SimpleLM(input: strSampleData) let langModel = SimpleLM(input: strSampleData)
let span = Megrez.Compositor.SpanUnit() var span = Megrez.SpanUnit()
let n1 = Megrez.Compositor.Node( let n1 = Megrez.Node(
keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(keyArray: ["gao1"]) keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(keyArray: ["gao1"])
) )
let n3 = Megrez.Compositor.Node( let n3 = Megrez.Node(
keyArray: ["gao1ke1ji4"], spanLength: 3, unigrams: langModel.unigramsFor(keyArray: ["gao1ke1ji4"]) keyArray: ["gao1ke1ji4"], spanLength: 3, unigrams: langModel.unigramsFor(keyArray: ["gao1ke1ji4"])
) )
XCTAssertEqual(span.maxLength, 0) XCTAssertEqual(span.maxLength, 0)
span.append(node: n1) span.addNode(node: n1)
XCTAssertEqual(span.maxLength, 1) XCTAssertEqual(span.maxLength, 1)
span.append(node: n3) span.addNode(node: n3)
XCTAssertEqual(span.maxLength, 3) XCTAssertEqual(span.maxLength, 3)
XCTAssertEqual(span.nodeOf(length: 1), n1) XCTAssertEqual(span[1], n1)
XCTAssertEqual(span.nodeOf(length: 2), nil) XCTAssertEqual(span[2], nil)
XCTAssertEqual(span.nodeOf(length: 3), n3) XCTAssertEqual(span[3], n3)
XCTAssertEqual(span.nodeOf(length: Megrez.Compositor.maxSpanLength), nil) XCTAssertEqual(span[Megrez.Compositor.maxSpanLength], nil)
span.clear() span.removeAll()
XCTAssertEqual(span.maxLength, 0) XCTAssertEqual(span.maxLength, 0)
XCTAssertEqual(span.nodeOf(length: 1), nil) XCTAssertEqual(span[1], nil)
XCTAssertEqual(span.nodeOf(length: 2), nil) XCTAssertEqual(span[2], nil)
XCTAssertEqual(span.nodeOf(length: 3), nil) XCTAssertEqual(span[3], nil)
XCTAssertEqual(span.nodeOf(length: Megrez.Compositor.maxSpanLength), nil) XCTAssertEqual(span[Megrez.Compositor.maxSpanLength], nil)
span.append(node: n1) span.addNode(node: n1)
span.append(node: n3) span.addNode(node: n3)
span.dropNodesOfOrBeyond(length: 2) span.dropNodesOfOrBeyond(length: 2)
XCTAssertEqual(span.maxLength, 1) XCTAssertEqual(span.maxLength, 1)
XCTAssertEqual(span.nodeOf(length: 1), n1) XCTAssertEqual(span[1], n1)
XCTAssertEqual(span.nodeOf(length: 2), nil) XCTAssertEqual(span[2], nil)
XCTAssertEqual(span.nodeOf(length: 3), nil) XCTAssertEqual(span[3], nil)
span.dropNodesOfOrBeyond(length: 1) span.dropNodesOfOrBeyond(length: 1)
XCTAssertEqual(span.maxLength, 0) XCTAssertEqual(span.maxLength, 0)
XCTAssertEqual(span.nodeOf(length: 1), nil) XCTAssertEqual(span[1], nil)
let n114514 = Megrez.Compositor.Node(spanLength: 114_514) let n114514 = Megrez.Node(spanLength: 114_514)
XCTAssertFalse(span.append(node: n114514)) XCTAssertFalse(span.addNode(node: n114514))
XCTAssertNil(span.nodeOf(length: 0)) XCTAssertNil(span[0])
XCTAssertNil(span.nodeOf(length: Megrez.Compositor.maxSpanLength + 1)) XCTAssertNil(span[Megrez.Compositor.maxSpanLength + 1])
} }
func test02_RankedLangModel() throws { func test02_RankedLangModel() throws {
@ -85,7 +85,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 1) XCTAssertEqual(compositor.length, 1)
XCTAssertEqual(compositor.spans.count, 1) XCTAssertEqual(compositor.spans.count, 1)
XCTAssertEqual(compositor.spans[0].maxLength, 1) XCTAssertEqual(compositor.spans[0].maxLength, 1)
guard let zeroNode = compositor.spans[0].nodeOf(length: 1) else { guard let zeroNode = compositor.spans[0][1] else {
print("fuckme") print("fuckme")
return return
} }
@ -149,14 +149,14 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 3) XCTAssertEqual(compositor.length, 3)
XCTAssertEqual(compositor.spans.count, 3) XCTAssertEqual(compositor.spans.count, 3)
XCTAssertEqual(compositor.spans[0].maxLength, 3) XCTAssertEqual(compositor.spans[0].maxLength, 3)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;b") XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;b")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "a;b;c") XCTAssertEqual(compositor.spans[0][3]?.keyArray.joined(separator: compositor.separator), "a;b;c")
XCTAssertEqual(compositor.spans[1].maxLength, 2) XCTAssertEqual(compositor.spans[1].maxLength, 2)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b") XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "b;c") XCTAssertEqual(compositor.spans[1][2]?.keyArray.joined(separator: compositor.separator), "b;c")
XCTAssertEqual(compositor.spans[2].maxLength, 1) XCTAssertEqual(compositor.spans[2].maxLength, 1)
XCTAssertEqual(compositor.spans[2].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") XCTAssertEqual(compositor.spans[2][1]?.keyArray.joined(separator: compositor.separator), "c")
} }
func test07_Compositor_SpanDeletionFromFront() throws { func test07_Compositor_SpanDeletionFromFront() throws {
@ -171,10 +171,10 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 2) XCTAssertEqual(compositor.length, 2)
XCTAssertEqual(compositor.spans.count, 2) XCTAssertEqual(compositor.spans.count, 2)
XCTAssertEqual(compositor.spans[0].maxLength, 2) XCTAssertEqual(compositor.spans[0].maxLength, 2)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;b") XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;b")
XCTAssertEqual(compositor.spans[1].maxLength, 1) XCTAssertEqual(compositor.spans[1].maxLength, 1)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b") XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "b")
} }
func test08_Compositor_SpanDeletionFromMiddle() throws { func test08_Compositor_SpanDeletionFromMiddle() throws {
@ -190,10 +190,10 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 2) XCTAssertEqual(compositor.length, 2)
XCTAssertEqual(compositor.spans.count, 2) XCTAssertEqual(compositor.spans.count, 2)
XCTAssertEqual(compositor.spans[0].maxLength, 2) XCTAssertEqual(compositor.spans[0].maxLength, 2)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;c") XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;c")
XCTAssertEqual(compositor.spans[1].maxLength, 1) XCTAssertEqual(compositor.spans[1].maxLength, 1)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "c")
compositor.clear() compositor.clear()
compositor.insertKey("a") compositor.insertKey("a")
@ -206,10 +206,10 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 2) XCTAssertEqual(compositor.length, 2)
XCTAssertEqual(compositor.spans.count, 2) XCTAssertEqual(compositor.spans.count, 2)
XCTAssertEqual(compositor.spans[0].maxLength, 2) XCTAssertEqual(compositor.spans[0].maxLength, 2)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;c") XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;c")
XCTAssertEqual(compositor.spans[1].maxLength, 1) XCTAssertEqual(compositor.spans[1].maxLength, 1)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "c")
} }
func test09_Compositor_SpanDeletionFromRear() throws { func test09_Compositor_SpanDeletionFromRear() throws {
@ -226,10 +226,10 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 2) XCTAssertEqual(compositor.length, 2)
XCTAssertEqual(compositor.spans.count, 2) XCTAssertEqual(compositor.spans.count, 2)
XCTAssertEqual(compositor.spans[0].maxLength, 2) XCTAssertEqual(compositor.spans[0].maxLength, 2)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b") XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "b;c") XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "b;c")
XCTAssertEqual(compositor.spans[1].maxLength, 1) XCTAssertEqual(compositor.spans[1].maxLength, 1)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "c")
} }
func test10_Compositor_SpanInsertion() throws { func test10_Compositor_SpanInsertion() throws {
@ -245,19 +245,19 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 4) XCTAssertEqual(compositor.length, 4)
XCTAssertEqual(compositor.spans.count, 4) XCTAssertEqual(compositor.spans.count, 4)
XCTAssertEqual(compositor.spans[0].maxLength, 4) XCTAssertEqual(compositor.spans[0].maxLength, 4)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a") XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;X") XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;X")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "a;X;b") XCTAssertEqual(compositor.spans[0][3]?.keyArray.joined(separator: compositor.separator), "a;X;b")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 4)?.keyArray.joined(separator: compositor.separator), "a;X;b;c") XCTAssertEqual(compositor.spans[0][4]?.keyArray.joined(separator: compositor.separator), "a;X;b;c")
XCTAssertEqual(compositor.spans[1].maxLength, 3) XCTAssertEqual(compositor.spans[1].maxLength, 3)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "X") XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "X")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "X;b") XCTAssertEqual(compositor.spans[1][2]?.keyArray.joined(separator: compositor.separator), "X;b")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "X;b;c") XCTAssertEqual(compositor.spans[1][3]?.keyArray.joined(separator: compositor.separator), "X;b;c")
XCTAssertEqual(compositor.spans[2].maxLength, 2) XCTAssertEqual(compositor.spans[2].maxLength, 2)
XCTAssertEqual(compositor.spans[2].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b") XCTAssertEqual(compositor.spans[2][1]?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[2].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "b;c") XCTAssertEqual(compositor.spans[2][2]?.keyArray.joined(separator: compositor.separator), "b;c")
XCTAssertEqual(compositor.spans[3].maxLength, 1) XCTAssertEqual(compositor.spans[3].maxLength, 1)
XCTAssertEqual(compositor.spans[3].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") XCTAssertEqual(compositor.spans[3][1]?.keyArray.joined(separator: compositor.separator), "c")
} }
func test11_Compositor_LongGridDeletion() throws { func test11_Compositor_LongGridDeletion() throws {
@ -282,17 +282,17 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.cursor, 6) XCTAssertEqual(compositor.cursor, 6)
XCTAssertEqual(compositor.length, 13) XCTAssertEqual(compositor.length, 13)
XCTAssertEqual(compositor.spans.count, 13) XCTAssertEqual(compositor.spans.count, 13)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "abcdef") XCTAssertEqual(compositor.spans[0][6]?.keyArray.joined(separator: compositor.separator), "abcdef")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "bcdefh") XCTAssertEqual(compositor.spans[1][6]?.keyArray.joined(separator: compositor.separator), "bcdefh")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "bcdef") XCTAssertEqual(compositor.spans[1][5]?.keyArray.joined(separator: compositor.separator), "bcdef")
XCTAssertEqual(compositor.spans[2].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "cdefhi") XCTAssertEqual(compositor.spans[2][6]?.keyArray.joined(separator: compositor.separator), "cdefhi")
XCTAssertEqual(compositor.spans[2].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "cdefh") XCTAssertEqual(compositor.spans[2][5]?.keyArray.joined(separator: compositor.separator), "cdefh")
XCTAssertEqual(compositor.spans[3].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "defhij") XCTAssertEqual(compositor.spans[3][6]?.keyArray.joined(separator: compositor.separator), "defhij")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "efhijk") XCTAssertEqual(compositor.spans[4][6]?.keyArray.joined(separator: compositor.separator), "efhijk")
XCTAssertEqual(compositor.spans[5].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "fhijkl") XCTAssertEqual(compositor.spans[5][6]?.keyArray.joined(separator: compositor.separator), "fhijkl")
XCTAssertEqual(compositor.spans[6].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "hijklm") XCTAssertEqual(compositor.spans[6][6]?.keyArray.joined(separator: compositor.separator), "hijklm")
XCTAssertEqual(compositor.spans[7].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "ijklmn") XCTAssertEqual(compositor.spans[7][6]?.keyArray.joined(separator: compositor.separator), "ijklmn")
XCTAssertEqual(compositor.spans[8].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "jklmn") XCTAssertEqual(compositor.spans[8][5]?.keyArray.joined(separator: compositor.separator), "jklmn")
} }
func test12_Compositor_LongGridInsertion() throws { func test12_Compositor_LongGridInsertion() throws {
@ -317,25 +317,25 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.cursor, 8) XCTAssertEqual(compositor.cursor, 8)
XCTAssertEqual(compositor.length, 15) XCTAssertEqual(compositor.length, 15)
XCTAssertEqual(compositor.spans.count, 15) XCTAssertEqual(compositor.spans.count, 15)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "abcdef") XCTAssertEqual(compositor.spans[0][6]?.keyArray.joined(separator: compositor.separator), "abcdef")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "bcdefg") XCTAssertEqual(compositor.spans[1][6]?.keyArray.joined(separator: compositor.separator), "bcdefg")
XCTAssertEqual(compositor.spans[2].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "cdefgX") XCTAssertEqual(compositor.spans[2][6]?.keyArray.joined(separator: compositor.separator), "cdefgX")
XCTAssertEqual(compositor.spans[3].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "defgXh") XCTAssertEqual(compositor.spans[3][6]?.keyArray.joined(separator: compositor.separator), "defgXh")
XCTAssertEqual(compositor.spans[3].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "defgX") XCTAssertEqual(compositor.spans[3][5]?.keyArray.joined(separator: compositor.separator), "defgX")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "efgXhi") XCTAssertEqual(compositor.spans[4][6]?.keyArray.joined(separator: compositor.separator), "efgXhi")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "efgXh") XCTAssertEqual(compositor.spans[4][5]?.keyArray.joined(separator: compositor.separator), "efgXh")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 4)?.keyArray.joined(separator: compositor.separator), "efgX") XCTAssertEqual(compositor.spans[4][4]?.keyArray.joined(separator: compositor.separator), "efgX")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "efg") XCTAssertEqual(compositor.spans[4][3]?.keyArray.joined(separator: compositor.separator), "efg")
XCTAssertEqual(compositor.spans[5].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "fgXhij") XCTAssertEqual(compositor.spans[5][6]?.keyArray.joined(separator: compositor.separator), "fgXhij")
XCTAssertEqual(compositor.spans[6].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "gXhijk") XCTAssertEqual(compositor.spans[6][6]?.keyArray.joined(separator: compositor.separator), "gXhijk")
XCTAssertEqual(compositor.spans[7].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "Xhijkl") XCTAssertEqual(compositor.spans[7][6]?.keyArray.joined(separator: compositor.separator), "Xhijkl")
XCTAssertEqual(compositor.spans[8].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "hijklm") XCTAssertEqual(compositor.spans[8][6]?.keyArray.joined(separator: compositor.separator), "hijklm")
} }
func test13_Compositor_StressBench() throws { func test13_Compositor_StressBench() throws {
NSLog("// Stress test preparation begins.") NSLog("// Stress test preparation begins.")
var compositor = Megrez.Compositor(with: SimpleLM(input: strStressData)) var compositor = Megrez.Compositor(with: SimpleLM(input: strStressData))
for _ in 0 ..< 1919 { (0 ..< 1919).forEach { _ in
compositor.insertKey("yi") compositor.insertKey("yi")
} }
NSLog("// Stress test started.") NSLog("// Stress test started.")
@ -348,8 +348,8 @@ final class MegrezTests: XCTestCase {
func test14_Compositor_WordSegmentation() throws { func test14_Compositor_WordSegmentation() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData, swapKeyValue: true)) var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData, swapKeyValue: true))
compositor.separator = "" compositor.separator = ""
for i in "高科技公司的年終獎金" { "高科技公司的年終獎金".forEach { i in
compositor.insertKey(String(i)) compositor.insertKey(i.description)
} }
let result = compositor.walk().0 let result = compositor.walk().0
XCTAssertEqual(result.joinedKeys(by: ""), ["高科技", "公司", "", "年終", "獎金"]) XCTAssertEqual(result.joinedKeys(by: ""), ["高科技", "公司", "", "年終", "獎金"])
@ -546,4 +546,17 @@ final class MegrezTests: XCTestCase {
print(newResult2) print(newResult2)
XCTAssertEqual(newResult2, ["", ""]) XCTAssertEqual(newResult2, ["", ""])
} }
func test21_Compositor_hardCopy() throws {
let theLM = SimpleLM(input: strSampleData)
let rawReadings = "gao1 ke1 ji4 gong1 si1 de5 nian2 zhong1 jiang3 jin1"
var compositorA = Megrez.Compositor(with: theLM)
rawReadings.split(separator: " ").forEach { key in
compositorA.insertKey(key.description)
}
var compositorB = compositorA.hardCopy
let resultA = compositorA.walk().walkedNodes
let resultB = compositorB.walk().walkedNodes
XCTAssertEqual(resultA, resultB)
}
} }