Megrez // Compositor refactoration with hard copy support.

This commit is contained in:
ShikiSuen 2023-03-09 22:28:06 +08:00
parent 22ead33ba9
commit 490a646f88
9 changed files with 291 additions and 243 deletions

View File

@ -66,6 +66,26 @@ public extension Megrez {
self.separator = separator
}
///
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
public init(from target: Compositor) {
cursor = target.cursor
marker = target.marker
separator = target.separator
walkedNodes = target.walkedNodes.map(\.copy)
keys = target.keys
spans = target.spans.map(\.hardCopy)
langModel = target.langModel
}
///
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
public var hardCopy: Compositor { .init(from: self) }
///
///
///
@ -167,21 +187,19 @@ public extension Megrez {
public var dumpDOT: String {
// C# StringBuilder Swift NSMutableString
let strOutput: NSMutableString = .init(string: "digraph {\ngraph [ rankdir=LR ];\nBOS;\n")
for (p, span) in spans.enumerated() {
for ni in 0 ... (span.maxLength) {
guard let np = span.nodeOf(length: ni) else { continue }
if p == 0 {
strOutput.append("BOS -> \(np.value);\n")
}
spans.enumerated().forEach { p, span in
(0 ... span.maxLength).forEach { ni in
guard let np = span[ni] else { return }
if p == 0 { strOutput.append("BOS -> \(np.value);\n") }
strOutput.append("\(np.value);\n")
if (p + ni) < spans.count {
let destinationSpan = spans[p + ni]
for q in 0 ... (destinationSpan.maxLength) {
guard let dn = destinationSpan.nodeOf(length: q) else { continue }
(0 ... destinationSpan.maxLength).forEach { q in
guard let dn = destinationSpan[q] else { return }
strOutput.append(np.value + " -> " + dn.value + ";\n")
}
}
guard (p + ni) == spans.count else { continue }
guard (p + ni) == spans.count else { return }
strOutput.append(np.value + " -> EOS;\n")
}
}
@ -198,11 +216,11 @@ extension Megrez.Compositor {
/// - Parameters:
/// - location:
/// - action:
mutating func resizeGrid(at location: Int, do action: ResizeBehavior) {
private mutating func resizeGrid(at location: Int, do action: ResizeBehavior) {
let location = max(min(location, spans.count), 0) //
switch action {
case .expand:
spans.insert(SpanUnit(), at: location)
spans.insert(.init(), at: location)
if [0, spans.count].contains(location) { return }
case .shrink:
if spans.count == location { return }
@ -248,60 +266,54 @@ extension Megrez.Compositor {
let affectedLength = Megrez.Compositor.maxSpanLength - 1
let begin = max(0, location - affectedLength)
guard location >= begin else { return }
for i in begin ..< location {
spans[i].dropNodesOfOrBeyond(length: location - i + 1)
(begin ..< location).forEach { delta in
((location - delta + 1) ... Self.maxSpanLength).forEach { theLength in
spans[delta][theLength] = nil
}
}
}
///
/// - Parameter range:
/// - Returns:
func getJoinedKeyArray(range: Range<Int>) -> [String] {
private func getJoinedKeyArray(range: Range<Int>) -> [String] {
// contains macOS 13 Ventura
guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] }
return keys[range].map(\.description)
}
///
/// - Parameters:
/// - location:
/// - length:
/// - keyArray:
/// - Returns: nil
func getNode(at location: Int, length: Int, keyArray: [String]) -> Node? {
let location = max(min(location, spans.count - 1), 0) //
guard let node = spans[location].nodeOf(length: length) else { return nil }
return keyArray == node.keyArray ? node : nil
}
///
/// - Parameter updateExisting:
///
/// - Returns: 0
@discardableResult public mutating func update(updateExisting: Bool = false) -> Int {
let maxSpanLength = Megrez.Compositor.maxSpanLength
let range = max(0, cursor - maxSpanLength) ..< min(cursor + maxSpanLength, keys.count)
let rangeOfPositions = max(0, cursor - maxSpanLength) ..< min(cursor + maxSpanLength, keys.count)
var nodesChanged = 0
for position in range {
for theLength in 1 ... min(maxSpanLength, range.upperBound - position) {
let joinedKeyArray = getJoinedKeyArray(range: position ..< (position + theLength))
if let theNode = getNode(at: position, length: theLength, keyArray: joinedKeyArray) {
if !updateExisting { continue }
rangeOfPositions.forEach { position in
let rangeOfLengths = 1 ... min(maxSpanLength, rangeOfPositions.upperBound - position)
rangeOfLengths.forEach { theLength in
guard position + theLength <= keys.count, position >= 0 else { return }
let joinedKeyArray = keys[position ..< (position + theLength)].map(\.description)
if let theNode = spans[position][theLength] {
if !updateExisting { return }
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
//
if unigrams.isEmpty {
if theNode.keyArray.count == 1 { continue }
spans[position].nullify(node: theNode)
if theNode.keyArray.count == 1 { return }
spans[position][theNode.spanLength] = nil
} else {
theNode.syncingUnigrams(from: unigrams)
}
nodesChanged += 1
continue
return
}
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
guard !unigrams.isEmpty else { continue }
spans[position].append(
node: .init(keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams)
guard !unigrams.isEmpty else { return }
// SpanUnit.addNode
spans[position][theLength] = .init(
keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams
)
nodesChanged += 1
}

View File

@ -13,37 +13,34 @@ public extension Megrez.Compositor {
/// `G = (V, E)` `O(|V|+|E|)` `G`
/// 使
/// - Returns:
@discardableResult mutating func walk() -> (walkedNode: [Node], succeeded: Bool) {
var result = [Node]()
@discardableResult mutating func walk() -> (walkedNodes: [Megrez.Node], succeeded: Bool) {
var result = [Megrez.Node]()
defer { walkedNodes = result }
guard !spans.isEmpty else { return (result, true) }
var vertexSpans = [[Vertex]]()
for _ in spans {
spans.forEach { _ in
vertexSpans.append(.init())
}
for (i, span) in spans.enumerated() {
for j in 1 ... max(span.maxLength, 1) {
if let theNode = span.nodeOf(length: j) {
vertexSpans[i].append(.init(node: theNode))
}
spans.enumerated().forEach { i, span in
(1 ... max(span.maxLength, 1)).forEach { j in
guard let theNode = span[j] else { return }
vertexSpans[i].append(.init(node: theNode))
}
}
let terminal = Vertex(node: .init(keyArray: ["_TERMINAL_"]))
var root = Vertex(node: .init(keyArray: ["_ROOT_"]))
for (i, vertexSpan) in vertexSpans.enumerated() {
for vertex in vertexSpan {
vertexSpans.enumerated().forEach { i, vertexSpan in
vertexSpan.forEach { vertex in
let nextVertexPosition = i + vertex.node.spanLength
if nextVertexPosition == vertexSpans.count {
vertex.edges.append(terminal)
continue
}
for nextVertex in vertexSpans[nextVertexPosition] {
vertex.edges.append(nextVertex)
return
}
vertexSpans[nextVertexPosition].forEach { vertex.edges.append($0) }
}
}
@ -51,15 +48,13 @@ public extension Megrez.Compositor {
root.edges.append(contentsOf: vertexSpans[0])
var ordered = topologicalSort(root: &root)
for (j, neta) in ordered.reversed().enumerated() {
for (k, _) in neta.edges.enumerated() {
relax(u: neta, v: &neta.edges[k])
}
ordered.reversed().enumerated().forEach { j, neta in
neta.edges.indices.forEach { relax(u: neta, v: &neta.edges[$0]) }
ordered[j] = neta
}
var iterated = terminal
var walked = [Node]()
var walked = [Megrez.Node]()
var totalLengthOfKeys = 0
while let itPrev = iterated.prev {

View File

@ -5,7 +5,7 @@
import Foundation
public extension Megrez.Compositor {
public extension Megrez {
///
struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible {
///
@ -18,6 +18,8 @@ public extension Megrez.Compositor {
public var isValid: Bool { !keyArray.joined().isEmpty && !value.isEmpty }
/// ()
public var toNGramKey: String { !isValid ? "()" : "(" + joinedKey() + "," + value + ")" }
///
public var tupletExpression: (keyArray: [String], value: String) { (keyArray, value) }
///
/// - Parameters:
@ -28,6 +30,13 @@ public extension Megrez.Compositor {
self.value = value.isEmpty ? "N/A" : value
}
///
/// - Parameter tupletExpression:
public init(_ tupletExpression: (keyArray: [String], value: String)) {
keyArray = tupletExpression.keyArray.isEmpty ? ["N/A"] : tupletExpression.keyArray
value = tupletExpression.value.isEmpty ? "N/A" : tupletExpression.value
}
///
/// - Parameters:
/// - key:
@ -72,7 +81,9 @@ public extension Megrez.Compositor {
|| (lhs.keyArray.count == rhs.keyArray.count && lhs.value >= rhs.value)
}
}
}
public extension Megrez.Compositor {
///
/// - all: 穿
/// - beginAt:
@ -84,8 +95,8 @@ public extension Megrez.Compositor {
/// location - 1
/// - Parameter location:
/// - Returns:
func fetchCandidates(at location: Int, filter: CandidateFetchFilter = .all) -> [KeyValuePaired] {
var result = [KeyValuePaired]()
func fetchCandidates(at location: Int, filter: CandidateFetchFilter = .all) -> [Megrez.KeyValuePaired] {
var result = [Megrez.KeyValuePaired]()
guard !keys.isEmpty else { return result }
let location = max(min(location, keys.count - 1), 0) //
let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted {
@ -93,17 +104,16 @@ public extension Megrez.Compositor {
$0.spanLength > $1.spanLength
}
let keyAtCursor = keys[location]
for theNode in anchors.map(\.node) {
if theNode.keyArray.isEmpty { continue }
for gram in theNode.unigrams {
anchors.map(\.node).filter(\.keyArray.isEmpty.negative).forEach { theNode in
theNode.unigrams.forEach { gram in
switch filter {
case .all:
//
if !theNode.keyArray.contains(keyAtCursor) { continue }
//
if !theNode.keyArray.contains(keyAtCursor) { return }
case .beginAt:
if theNode.keyArray[0] != keyAtCursor { continue }
if theNode.keyArray[0] != keyAtCursor { return }
case .endAt:
if theNode.keyArray.reversed()[0] != keyAtCursor { continue }
if theNode.keyArray.reversed()[0] != keyAtCursor { return }
}
result.append(.init(keyArray: theNode.keyArray, value: gram.value))
}
@ -120,7 +130,7 @@ public extension Megrez.Compositor {
/// - overrideType:
/// - Returns:
@discardableResult func overrideCandidate(
_ candidate: KeyValuePaired, at location: Int, overrideType: Node.OverrideType = .withHighScore
_ candidate: Megrez.KeyValuePaired, at location: Int, overrideType: Megrez.Node.OverrideType = .withHighScore
)
-> Bool
{
@ -137,7 +147,7 @@ public extension Megrez.Compositor {
/// - Returns:
@discardableResult func overrideCandidateLiteral(
_ candidate: String,
at location: Int, overrideType: Node.OverrideType = .withHighScore
at location: Int, overrideType: Megrez.Node.OverrideType = .withHighScore
) -> Bool {
overrideCandidateAgainst(keyArray: nil, at: location, value: candidate, type: overrideType)
}
@ -151,7 +161,7 @@ public extension Megrez.Compositor {
/// - value:
/// - type:
/// - Returns:
internal func overrideCandidateAgainst(keyArray: [String]?, at location: Int, value: String, type: Node.OverrideType)
internal func overrideCandidateAgainst(keyArray: [String]?, at location: Int, value: String, type: Megrez.Node.OverrideType)
-> Bool
{
let location = max(min(location, keys.count), 0) //
@ -166,18 +176,18 @@ public extension Megrez.Compositor {
guard let overridden = overridden else { return false } //
for i in overridden.spanIndex ..< min(spans.count, overridden.spanIndex + overridden.node.spanLength) {
(overridden.spanIndex ..< min(spans.count, overridden.spanIndex + overridden.node.spanLength)).forEach { i in
/// A BC
/// A BC 使 A
/// DEF BC A
arrOverlappedNodes = fetchOverlappingNodes(at: i)
for anchor in arrOverlappedNodes {
if anchor.node == overridden.node { continue }
arrOverlappedNodes.forEach { anchor in
if anchor.node == overridden.node { return }
if !overridden.node.joinedKey(by: "\t").contains(anchor.node.joinedKey(by: "\t"))
|| !overridden.node.value.contains(anchor.node.value)
{
anchor.node.reset()
continue
return
}
anchor.node.overridingScore /= 4
}
@ -208,3 +218,9 @@ private extension Sequence {
.map(\.element)
}
}
// MARK: - Bool Extension (Private)
extension Bool {
var negative: Bool { !self }
}

View File

@ -3,90 +3,84 @@
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez.Compositor {
///
public class SpanUnit {
/// nil
public var nodes: [Int: Node] = [:]
///
///
public var maxLength: Int { nodes.keys.max() ?? 0 }
public extension Megrez {
/// [: ]
typealias SpanUnit = [Int: Node]
}
/// Megrez.Compositor.maxSpanLength
private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength }
///
private var allowedLengths: ClosedRange<Int> { 1 ... maxSpanLength }
///
public init() {
clear()
}
/// 0
public func clear() {
nodes.removeAll()
}
///
/// - Parameter node:
/// - Returns:
@discardableResult public func append(node: Node) -> Bool {
guard allowedLengths.contains(node.spanLength) else { return false }
nodes[node.spanLength] = node
return true
}
///
/// - Remark: Swift C#
///
///
/// - Parameter node:
public func nullify(node givenNode: Node) {
let spanLength = givenNode.spanLength
nodes[spanLength] = nil
}
///
/// - Parameter length:
/// - Returns:
@discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool {
guard allowedLengths.contains(length) else { return false }
let length = min(length, maxSpanLength)
(length ... maxSpanLength).forEach { nodes[$0] = nil }
return true
}
///
/// - Parameter length:
/// - Returns:
public func nodeOf(length: Int) -> Node? {
guard allowedLengths.contains(length) else { return nil }
return nodes[length]
public extension Megrez.SpanUnit {
/// [: ]
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
init(SpanUnit target: Megrez.SpanUnit) {
self.init()
target.forEach { theKey, theValue in
self[theKey] = theValue.copy
}
}
// MARK: Internal implementations.
///
var hardCopy: Megrez.SpanUnit { .init(SpanUnit: self) }
// MARK: - Dynamic Variables
///
///
var maxLength: Int { keys.max() ?? 0 }
/// Megrez.Compositor.maxSpanLength
private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength }
///
private var allowedLengths: ClosedRange<Int> { 1 ... maxSpanLength }
// MARK: - Functions
///
/// - Remark:
/// - Parameter node:
/// - Returns:
@discardableResult mutating func addNode(node: Megrez.Node) -> Bool {
guard allowedLengths.contains(node.spanLength) else { return false }
self[node.spanLength] = node
return true
}
///
/// - Remark:
/// - Parameter length:
/// - Returns:
@discardableResult mutating func dropNodesOfOrBeyond(length: Int) -> Bool {
guard allowedLengths.contains(length) else { return false }
let length = Swift.min(length, maxSpanLength)
(length ... maxSpanLength).forEach { self[$0] = nil }
return true
}
}
// MARK: - Related Compositor Implementations.
extension Megrez.Compositor {
///
/// - Parameter location:
/// - Returns:
internal func fetchOverlappingNodes(at location: Int) -> [NodeAnchor] {
func fetchOverlappingNodes(at givenLocation: Int) -> [NodeAnchor] {
var results = [NodeAnchor]()
guard !spans.isEmpty, location < spans.count else { return results }
guard !spans.isEmpty, givenLocation < spans.count else { return results }
//
for theLocation in 1 ... spans[location].maxLength {
guard let node = spans[location].nodeOf(length: theLocation) else { continue }
results.append(.init(node: node, spanIndex: location))
(1 ... max(spans[givenLocation].maxLength, 1)).forEach { theSpanLength in
guard let node = spans[givenLocation][theSpanLength] else { return }
results.append(.init(node: node, spanIndex: givenLocation))
}
//
let begin: Int = location - min(location, Megrez.Compositor.maxSpanLength - 1)
for theLocation in begin ..< location {
let (A, B): (Int, Int) = (location - theLocation + 1, spans[theLocation].maxLength)
guard A <= B else { continue }
for theLength in A ... B {
guard let node = spans[theLocation].nodeOf(length: theLength) else { continue }
let begin: Int = givenLocation - min(givenLocation, Megrez.Compositor.maxSpanLength - 1)
(begin ..< givenLocation).forEach { theLocation in
let (A, B): (Int, Int) = (givenLocation - theLocation + 1, spans[theLocation].maxLength)
guard A <= B else { return }
(A ... B).forEach { theLength in
guard let node = spans[theLocation][theLength] else { return }
results.append(.init(node: node, spanIndex: theLocation))
}
}

View File

@ -20,13 +20,13 @@ extension Megrez.Compositor {
///
public var topologicallySorted = false
///
public var node: Node
public var node: Megrez.Node
///
///
///
/// - Parameter node:
public init(node: Node) {
public init(node: Megrez.Node) {
self.node = node
}
@ -65,7 +65,7 @@ extension Megrez.Compositor {
///
/// ```
/// func topologicalSort(vertex: Vertex) {
/// for vertexNode in vertex.edges {
/// vertex.edges.forEach {vertexNode in
/// if !vertexNode.topologicallySorted {
/// dfs(vertexNode, result)
/// vertexNode.topologicallySorted = true

View File

@ -5,7 +5,7 @@
import Foundation
public extension Megrez.Compositor {
public extension Megrez {
///
///
///
@ -38,8 +38,6 @@ public extension Megrez.Compositor {
/// c
public var overridingScore: Double = 114_514
// public var key: String { keyArray.joined(separator: Megrez.Compositor.theSeparator) }
///
public private(set) var keyArray: [String]
///
@ -54,21 +52,22 @@ public extension Megrez.Compositor {
}
///
public var currentPair: Megrez.Compositor.KeyValuePaired { .init(keyArray: keyArray, value: value) }
public var currentPair: Megrez.KeyValuePaired { .init(keyArray: keyArray, value: value) }
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) {
hasher.combine(overridingScore)
hasher.combine(keyArray)
hasher.combine(spanLength)
hasher.combine(unigrams)
hasher.combine(currentUnigramIndex)
hasher.combine(spanLength)
hasher.combine(currentOverrideType)
hasher.combine(currentUnigramIndex)
}
public static func == (lhs: Node, rhs: Node) -> Bool {
lhs.keyArray == rhs.keyArray && lhs.spanLength == rhs.spanLength
lhs.overridingScore == rhs.overridingScore && lhs.spanLength == rhs.spanLength
&& lhs.keyArray == rhs.keyArray && lhs.currentUnigramIndex == rhs.currentUnigramIndex
&& lhs.unigrams == rhs.unigrams && lhs.currentOverrideType == rhs.currentOverrideType
}
@ -90,6 +89,25 @@ public extension Megrez.Compositor {
currentOverrideType = .withNoOverrides
}
///
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
public init(node: Node) {
overridingScore = node.overridingScore
keyArray = node.keyArray
spanLength = node.spanLength
unigrams = node.unigrams
currentOverrideType = node.currentOverrideType
currentUnigramIndex = node.currentUnigramIndex
}
///
/// - Remark: Node Struct Compositor
/// Compositor Node Compositor
///
public var copy: Node { .init(node: self) }
///
public var isReadingMismatched: Bool { keyArray.count != value.count }
///
@ -162,7 +180,7 @@ public extension Megrez.Compositor {
/// Gramambular 2 NodeInSpan
struct NodeAnchor: Hashable {
///
let node: Megrez.Compositor.Node
let node: Megrez.Node
///
let spanIndex: Int
///
@ -185,7 +203,7 @@ public extension Megrez.Compositor {
// MARK: - Array Extensions.
public extension Array where Element == Megrez.Compositor.Node {
public extension Array where Element == Megrez.Node {
///
var values: [String] { map(\.value) }
@ -204,7 +222,7 @@ public extension Array where Element == Megrez.Compositor.Node {
var resultA = [Int: Int]()
var resultB: [Int: Int] = [-1: 0] //
var cursorCounter = 0
for (nodeCounter, neta) in enumerated() {
enumerated().forEach { nodeCounter, neta in
resultA[nodeCounter] = cursorCounter
neta.keyArray.forEach { _ in
resultB[cursorCounter] = nodeCounter
@ -243,7 +261,7 @@ public extension Array where Element == Megrez.Compositor.Node {
/// - cursor:
/// - outCursorPastNode:
/// - Returns:
func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? {
func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Node? {
guard !isEmpty else { return nil }
let cursor = Swift.max(0, Swift.min(cursor, totalKeyCount - 1)) //
let range = contextRange(ofGivenCursor: cursor)
@ -255,7 +273,7 @@ public extension Array where Element == Megrez.Compositor.Node {
///
/// - Parameter cursor:
/// - Returns:
func findNode(at cursor: Int) -> Megrez.Compositor.Node? {
func findNode(at cursor: Int) -> Megrez.Node? {
var useless = 0
return findNode(at: cursor, target: &useless)
}

View File

@ -48,8 +48,8 @@ public extension Array where Element == Megrez.Unigram {
mutating func consolidate(filter theFilter: Set<String> = .init()) {
var inserted: [String: Double] = [:]
var insertedArray: [Megrez.Unigram] = []
for neta in filter({ !theFilter.contains($0.value) }) {
if inserted.keys.contains(neta.value) { continue }
filter { !theFilter.contains($0.value) }.forEach { neta in
if inserted.keys.contains(neta.value) { return }
inserted[neta.value] = neta.score
insertedArray.append(neta)
}

View File

@ -11,9 +11,9 @@ class SimpleLM: LangModelProtocol {
var mutDatabase: [String: [Megrez.Unigram]] = [:]
init(input: String, swapKeyValue: Bool = false) {
let sstream = input.components(separatedBy: "\n")
for line in sstream {
sstream.forEach { line in
if line.isEmpty || line.hasPrefix("#") {
continue
return
}
let linestream = line.split(separator: " ")
let col0 = String(linestream[0])

View File

@ -11,43 +11,43 @@ import XCTest
final class MegrezTests: XCTestCase {
func test01_Span() throws {
let langModel = SimpleLM(input: strSampleData)
let span = Megrez.Compositor.SpanUnit()
let n1 = Megrez.Compositor.Node(
var span = Megrez.SpanUnit()
let n1 = Megrez.Node(
keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(keyArray: ["gao1"])
)
let n3 = Megrez.Compositor.Node(
let n3 = Megrez.Node(
keyArray: ["gao1ke1ji4"], spanLength: 3, unigrams: langModel.unigramsFor(keyArray: ["gao1ke1ji4"])
)
XCTAssertEqual(span.maxLength, 0)
span.append(node: n1)
span.addNode(node: n1)
XCTAssertEqual(span.maxLength, 1)
span.append(node: n3)
span.addNode(node: n3)
XCTAssertEqual(span.maxLength, 3)
XCTAssertEqual(span.nodeOf(length: 1), n1)
XCTAssertEqual(span.nodeOf(length: 2), nil)
XCTAssertEqual(span.nodeOf(length: 3), n3)
XCTAssertEqual(span.nodeOf(length: Megrez.Compositor.maxSpanLength), nil)
span.clear()
XCTAssertEqual(span[1], n1)
XCTAssertEqual(span[2], nil)
XCTAssertEqual(span[3], n3)
XCTAssertEqual(span[Megrez.Compositor.maxSpanLength], nil)
span.removeAll()
XCTAssertEqual(span.maxLength, 0)
XCTAssertEqual(span.nodeOf(length: 1), nil)
XCTAssertEqual(span.nodeOf(length: 2), nil)
XCTAssertEqual(span.nodeOf(length: 3), nil)
XCTAssertEqual(span.nodeOf(length: Megrez.Compositor.maxSpanLength), nil)
XCTAssertEqual(span[1], nil)
XCTAssertEqual(span[2], nil)
XCTAssertEqual(span[3], nil)
XCTAssertEqual(span[Megrez.Compositor.maxSpanLength], nil)
span.append(node: n1)
span.append(node: n3)
span.addNode(node: n1)
span.addNode(node: n3)
span.dropNodesOfOrBeyond(length: 2)
XCTAssertEqual(span.maxLength, 1)
XCTAssertEqual(span.nodeOf(length: 1), n1)
XCTAssertEqual(span.nodeOf(length: 2), nil)
XCTAssertEqual(span.nodeOf(length: 3), nil)
XCTAssertEqual(span[1], n1)
XCTAssertEqual(span[2], nil)
XCTAssertEqual(span[3], nil)
span.dropNodesOfOrBeyond(length: 1)
XCTAssertEqual(span.maxLength, 0)
XCTAssertEqual(span.nodeOf(length: 1), nil)
let n114514 = Megrez.Compositor.Node(spanLength: 114_514)
XCTAssertFalse(span.append(node: n114514))
XCTAssertNil(span.nodeOf(length: 0))
XCTAssertNil(span.nodeOf(length: Megrez.Compositor.maxSpanLength + 1))
XCTAssertEqual(span[1], nil)
let n114514 = Megrez.Node(spanLength: 114_514)
XCTAssertFalse(span.addNode(node: n114514))
XCTAssertNil(span[0])
XCTAssertNil(span[Megrez.Compositor.maxSpanLength + 1])
}
func test02_RankedLangModel() throws {
@ -85,7 +85,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 1)
XCTAssertEqual(compositor.spans.count, 1)
XCTAssertEqual(compositor.spans[0].maxLength, 1)
guard let zeroNode = compositor.spans[0].nodeOf(length: 1) else {
guard let zeroNode = compositor.spans[0][1] else {
print("fuckme")
return
}
@ -149,14 +149,14 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 3)
XCTAssertEqual(compositor.spans.count, 3)
XCTAssertEqual(compositor.spans[0].maxLength, 3)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;b")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "a;b;c")
XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;b")
XCTAssertEqual(compositor.spans[0][3]?.keyArray.joined(separator: compositor.separator), "a;b;c")
XCTAssertEqual(compositor.spans[1].maxLength, 2)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "b;c")
XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[1][2]?.keyArray.joined(separator: compositor.separator), "b;c")
XCTAssertEqual(compositor.spans[2].maxLength, 1)
XCTAssertEqual(compositor.spans[2].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
XCTAssertEqual(compositor.spans[2][1]?.keyArray.joined(separator: compositor.separator), "c")
}
func test07_Compositor_SpanDeletionFromFront() throws {
@ -171,10 +171,10 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 2)
XCTAssertEqual(compositor.spans.count, 2)
XCTAssertEqual(compositor.spans[0].maxLength, 2)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;b")
XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;b")
XCTAssertEqual(compositor.spans[1].maxLength, 1)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "b")
}
func test08_Compositor_SpanDeletionFromMiddle() throws {
@ -190,10 +190,10 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 2)
XCTAssertEqual(compositor.spans.count, 2)
XCTAssertEqual(compositor.spans[0].maxLength, 2)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;c")
XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;c")
XCTAssertEqual(compositor.spans[1].maxLength, 1)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "c")
compositor.clear()
compositor.insertKey("a")
@ -206,10 +206,10 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 2)
XCTAssertEqual(compositor.spans.count, 2)
XCTAssertEqual(compositor.spans[0].maxLength, 2)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;c")
XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;c")
XCTAssertEqual(compositor.spans[1].maxLength, 1)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "c")
}
func test09_Compositor_SpanDeletionFromRear() throws {
@ -226,10 +226,10 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 2)
XCTAssertEqual(compositor.spans.count, 2)
XCTAssertEqual(compositor.spans[0].maxLength, 2)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "b;c")
XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "b;c")
XCTAssertEqual(compositor.spans[1].maxLength, 1)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "c")
}
func test10_Compositor_SpanInsertion() throws {
@ -245,19 +245,19 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 4)
XCTAssertEqual(compositor.spans.count, 4)
XCTAssertEqual(compositor.spans[0].maxLength, 4)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "a;X")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "a;X;b")
XCTAssertEqual(compositor.spans[0].nodeOf(length: 4)?.keyArray.joined(separator: compositor.separator), "a;X;b;c")
XCTAssertEqual(compositor.spans[0][1]?.keyArray.joined(separator: compositor.separator), "a")
XCTAssertEqual(compositor.spans[0][2]?.keyArray.joined(separator: compositor.separator), "a;X")
XCTAssertEqual(compositor.spans[0][3]?.keyArray.joined(separator: compositor.separator), "a;X;b")
XCTAssertEqual(compositor.spans[0][4]?.keyArray.joined(separator: compositor.separator), "a;X;b;c")
XCTAssertEqual(compositor.spans[1].maxLength, 3)
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "X")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "X;b")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "X;b;c")
XCTAssertEqual(compositor.spans[1][1]?.keyArray.joined(separator: compositor.separator), "X")
XCTAssertEqual(compositor.spans[1][2]?.keyArray.joined(separator: compositor.separator), "X;b")
XCTAssertEqual(compositor.spans[1][3]?.keyArray.joined(separator: compositor.separator), "X;b;c")
XCTAssertEqual(compositor.spans[2].maxLength, 2)
XCTAssertEqual(compositor.spans[2].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[2].nodeOf(length: 2)?.keyArray.joined(separator: compositor.separator), "b;c")
XCTAssertEqual(compositor.spans[2][1]?.keyArray.joined(separator: compositor.separator), "b")
XCTAssertEqual(compositor.spans[2][2]?.keyArray.joined(separator: compositor.separator), "b;c")
XCTAssertEqual(compositor.spans[3].maxLength, 1)
XCTAssertEqual(compositor.spans[3].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
XCTAssertEqual(compositor.spans[3][1]?.keyArray.joined(separator: compositor.separator), "c")
}
func test11_Compositor_LongGridDeletion() throws {
@ -282,17 +282,17 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.cursor, 6)
XCTAssertEqual(compositor.length, 13)
XCTAssertEqual(compositor.spans.count, 13)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "abcdef")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "bcdefh")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "bcdef")
XCTAssertEqual(compositor.spans[2].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "cdefhi")
XCTAssertEqual(compositor.spans[2].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "cdefh")
XCTAssertEqual(compositor.spans[3].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "defhij")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "efhijk")
XCTAssertEqual(compositor.spans[5].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "fhijkl")
XCTAssertEqual(compositor.spans[6].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "hijklm")
XCTAssertEqual(compositor.spans[7].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "ijklmn")
XCTAssertEqual(compositor.spans[8].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "jklmn")
XCTAssertEqual(compositor.spans[0][6]?.keyArray.joined(separator: compositor.separator), "abcdef")
XCTAssertEqual(compositor.spans[1][6]?.keyArray.joined(separator: compositor.separator), "bcdefh")
XCTAssertEqual(compositor.spans[1][5]?.keyArray.joined(separator: compositor.separator), "bcdef")
XCTAssertEqual(compositor.spans[2][6]?.keyArray.joined(separator: compositor.separator), "cdefhi")
XCTAssertEqual(compositor.spans[2][5]?.keyArray.joined(separator: compositor.separator), "cdefh")
XCTAssertEqual(compositor.spans[3][6]?.keyArray.joined(separator: compositor.separator), "defhij")
XCTAssertEqual(compositor.spans[4][6]?.keyArray.joined(separator: compositor.separator), "efhijk")
XCTAssertEqual(compositor.spans[5][6]?.keyArray.joined(separator: compositor.separator), "fhijkl")
XCTAssertEqual(compositor.spans[6][6]?.keyArray.joined(separator: compositor.separator), "hijklm")
XCTAssertEqual(compositor.spans[7][6]?.keyArray.joined(separator: compositor.separator), "ijklmn")
XCTAssertEqual(compositor.spans[8][5]?.keyArray.joined(separator: compositor.separator), "jklmn")
}
func test12_Compositor_LongGridInsertion() throws {
@ -317,25 +317,25 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.cursor, 8)
XCTAssertEqual(compositor.length, 15)
XCTAssertEqual(compositor.spans.count, 15)
XCTAssertEqual(compositor.spans[0].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "abcdef")
XCTAssertEqual(compositor.spans[1].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "bcdefg")
XCTAssertEqual(compositor.spans[2].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "cdefgX")
XCTAssertEqual(compositor.spans[3].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "defgXh")
XCTAssertEqual(compositor.spans[3].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "defgX")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "efgXhi")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "efgXh")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 4)?.keyArray.joined(separator: compositor.separator), "efgX")
XCTAssertEqual(compositor.spans[4].nodeOf(length: 3)?.keyArray.joined(separator: compositor.separator), "efg")
XCTAssertEqual(compositor.spans[5].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "fgXhij")
XCTAssertEqual(compositor.spans[6].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "gXhijk")
XCTAssertEqual(compositor.spans[7].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "Xhijkl")
XCTAssertEqual(compositor.spans[8].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "hijklm")
XCTAssertEqual(compositor.spans[0][6]?.keyArray.joined(separator: compositor.separator), "abcdef")
XCTAssertEqual(compositor.spans[1][6]?.keyArray.joined(separator: compositor.separator), "bcdefg")
XCTAssertEqual(compositor.spans[2][6]?.keyArray.joined(separator: compositor.separator), "cdefgX")
XCTAssertEqual(compositor.spans[3][6]?.keyArray.joined(separator: compositor.separator), "defgXh")
XCTAssertEqual(compositor.spans[3][5]?.keyArray.joined(separator: compositor.separator), "defgX")
XCTAssertEqual(compositor.spans[4][6]?.keyArray.joined(separator: compositor.separator), "efgXhi")
XCTAssertEqual(compositor.spans[4][5]?.keyArray.joined(separator: compositor.separator), "efgXh")
XCTAssertEqual(compositor.spans[4][4]?.keyArray.joined(separator: compositor.separator), "efgX")
XCTAssertEqual(compositor.spans[4][3]?.keyArray.joined(separator: compositor.separator), "efg")
XCTAssertEqual(compositor.spans[5][6]?.keyArray.joined(separator: compositor.separator), "fgXhij")
XCTAssertEqual(compositor.spans[6][6]?.keyArray.joined(separator: compositor.separator), "gXhijk")
XCTAssertEqual(compositor.spans[7][6]?.keyArray.joined(separator: compositor.separator), "Xhijkl")
XCTAssertEqual(compositor.spans[8][6]?.keyArray.joined(separator: compositor.separator), "hijklm")
}
func test13_Compositor_StressBench() throws {
NSLog("// Stress test preparation begins.")
var compositor = Megrez.Compositor(with: SimpleLM(input: strStressData))
for _ in 0 ..< 1919 {
(0 ..< 1919).forEach { _ in
compositor.insertKey("yi")
}
NSLog("// Stress test started.")
@ -348,8 +348,8 @@ final class MegrezTests: XCTestCase {
func test14_Compositor_WordSegmentation() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData, swapKeyValue: true))
compositor.separator = ""
for i in "高科技公司的年終獎金" {
compositor.insertKey(String(i))
"高科技公司的年終獎金".forEach { i in
compositor.insertKey(i.description)
}
let result = compositor.walk().0
XCTAssertEqual(result.joinedKeys(by: ""), ["高科技", "公司", "", "年終", "獎金"])
@ -546,4 +546,17 @@ final class MegrezTests: XCTestCase {
print(newResult2)
XCTAssertEqual(newResult2, ["", ""])
}
func test21_Compositor_hardCopy() throws {
let theLM = SimpleLM(input: strSampleData)
let rawReadings = "gao1 ke1 ji4 gong1 si1 de5 nian2 zhong1 jiang3 jin1"
var compositorA = Megrez.Compositor(with: theLM)
rawReadings.split(separator: " ").forEach { key in
compositorA.insertKey(key.description)
}
var compositorB = compositorA.hardCopy
let resultA = compositorA.walk().walkedNodes
let resultB = compositorB.walk().walkedNodes
XCTAssertEqual(resultA, resultB)
}
}