Megrez // 2.5.0 update, syncing changes from MegrezNT.

This commit is contained in:
ShikiSuen 2022-12-13 19:25:31 +08:00
parent 5bca4abef5
commit 258d2f7362
11 changed files with 271 additions and 218 deletions

View File

@ -1,5 +1,5 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)

View File

@ -1,8 +1,10 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
import Foundation
extension Megrez { extension Megrez {
/// ///
/// ///
@ -20,9 +22,9 @@ extension Megrez {
public enum ResizeBehavior { case expand, shrink } public enum ResizeBehavior { case expand, shrink }
/// ///
public static var maxSpanLength: Int = 10 { didSet { maxSpanLength = max(6, maxSpanLength) } } public static var maxSpanLength: Int = 10 { didSet { maxSpanLength = max(6, maxSpanLength) } }
/// - /// -
public static var theSeparator: String = "-" public static var theSeparator: String = "-"
/// ///
public var cursor: Int = 0 { public var cursor: Int = 0 {
didSet { didSet {
cursor = max(0, min(cursor, length)) cursor = max(0, min(cursor, length))
@ -30,30 +32,33 @@ extension Megrez {
} }
} }
/// ///
public var marker: Int = 0 { didSet { marker = max(0, min(marker, length)) } } public var marker: Int = 0 { didSet { marker = max(0, min(marker, length)) } }
/// - /// -
public var separator = theSeparator { public var separator = theSeparator {
didSet { didSet {
Self.theSeparator = separator Self.theSeparator = separator
} }
} }
/// ///
public var width: Int { keys.count }
///
public var walkedNodes: [Node] = [] public var walkedNodes: [Node] = []
/// ///
/// - Remark: spans.count
///
public var length: Int { keys.count } public var length: Int { keys.count }
/// ///
public var isEmpty: Bool { spans.isEmpty && keys.isEmpty } public var isEmpty: Bool { spans.isEmpty && keys.isEmpty }
/// ///
public private(set) var keys = [String]() public private(set) var keys = [String]()
/// ///
public private(set) var spans = [Span]() public private(set) var spans = [SpanUnit]()
/// 使 LangModelRanked /// 使 LangModelRanked
public var langModel: LangModelRanked public var langModel: LangModelRanked {
didSet { clear() }
}
/// 0 /// 0
public private(set) var cursorRegionMap: [Int: Int] = .init() public private(set) var cursorRegionMap: [Int: Int] = .init()
@ -64,8 +69,13 @@ extension Megrez {
self.separator = separator self.separator = separator
} }
///
///
///
///
public mutating func clear() { public mutating func clear() {
cursor = 0 cursor = 0
marker = 0
keys.removeAll() keys.removeAll()
spans.removeAll() spans.removeAll()
walkedNodes.removeAll() walkedNodes.removeAll()
@ -107,9 +117,16 @@ extension Megrez {
} }
/// ///
///
/// RearFront
/// - Parameters: /// - Parameters:
/// - direction: /// - direction:
/// - isMarker: /// - isMarker:
///
///
///
/// // InputState KeyHandler
/// NSStringUtils
/// - Returns: /// - Returns:
@discardableResult public mutating func jumpCursorBySpan(to direction: TypingDirection, isMarker: Bool = false) @discardableResult public mutating func jumpCursorBySpan(to direction: TypingDirection, isMarker: Bool = false)
-> Bool -> Bool
@ -117,7 +134,7 @@ extension Megrez {
var target = isMarker ? marker : cursor var target = isMarker ? marker : cursor
switch direction { switch direction {
case .front: case .front:
if target == width { return false } if target == length { return false }
case .rear: case .rear:
if target == 0 { return false } if target == 0 { return false }
} }
@ -152,27 +169,28 @@ extension Megrez {
/// GraphViz /// GraphViz
public var dumpDOT: String { public var dumpDOT: String {
var strOutput = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n" // C# StringBuilder Swift NSMutableString
let strOutput: NSMutableString = .init(string: "digraph {\ngraph [ rankdir=LR ];\nBOS;\n")
for (p, span) in spans.enumerated() { for (p, span) in spans.enumerated() {
for ni in 0...(span.maxLength) { for ni in 0...(span.maxLength) {
guard let np = span.nodeOf(length: ni) else { continue } guard let np = span.nodeOf(length: ni) else { continue }
if p == 0 { if p == 0 {
strOutput += "BOS -> \(np.value);\n" strOutput.append("BOS -> \(np.value);\n")
} }
strOutput += "\(np.value);\n" strOutput.append("\(np.value);\n")
if (p + ni) < spans.count { if (p + ni) < spans.count {
let destinationSpan = spans[p + ni] let destinationSpan = spans[p + ni]
for q in 0...(destinationSpan.maxLength) { for q in 0...(destinationSpan.maxLength) {
guard let dn = destinationSpan.nodeOf(length: q) else { continue } guard let dn = destinationSpan.nodeOf(length: q) else { continue }
strOutput += np.value + " -> " + dn.value + ";\n" strOutput.append(np.value + " -> " + dn.value + ";\n")
} }
} }
guard (p + ni) == spans.count else { continue } guard (p + ni) == spans.count else { continue }
strOutput += np.value + " -> EOS;\n" strOutput.append(np.value + " -> EOS;\n")
} }
} }
strOutput += "EOS;\n}\n" strOutput.append("EOS;\n}\n")
return strOutput return strOutput.description
} }
} }
} }
@ -180,9 +198,7 @@ extension Megrez {
// MARK: - Internal Methods (Maybe Public) // MARK: - Internal Methods (Maybe Public)
extension Megrez.Compositor { extension Megrez.Compositor {
// MARK: Internal methods for maintaining the grid. ///
///
/// - Parameters: /// - Parameters:
/// - location: /// - location:
/// - action: /// - action:
@ -190,7 +206,7 @@ extension Megrez.Compositor {
let location = max(min(location, spans.count), 0) // let location = max(min(location, spans.count), 0) //
switch action { switch action {
case .expand: case .expand:
spans.insert(Span(), at: location) spans.insert(SpanUnit(), at: location)
if [0, spans.count].contains(location) { return } if [0, spans.count].contains(location) { return }
case .shrink: case .shrink:
if spans.count == location { return } if spans.count == location { return }
@ -241,26 +257,31 @@ extension Megrez.Compositor {
} }
} }
@discardableResult mutating func insertNode(_ node: Node, at location: Int) -> Bool { ///
let location = max(min(location, spans.count - 1), 0) // /// - Parameter range:
spans[location].append(node: node) /// - Returns:
return true
}
func getJoinedKeyArray(range: Range<Int>) -> [String] { func getJoinedKeyArray(range: Range<Int>) -> [String] {
// contains macOS 13 Ventura // contains macOS 13 Ventura
guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] } guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] }
return keys[range].map { String($0) } return keys[range].map { String($0) }
} }
///
/// - Parameters:
/// - location:
/// - length:
/// - keyArray:
/// - Returns: nil
func getNode(at location: Int, length: Int, keyArray: [String]) -> Node? { func getNode(at location: Int, length: Int, keyArray: [String]) -> Node? {
let location = max(min(location, spans.count), 0) // let location = max(min(location, spans.count - 1), 0) //
guard let node = spans[location].nodeOf(length: length) else { return nil } guard let node = spans[location].nodeOf(length: length) else { return nil }
return keyArray == node.keyArray ? node : nil return keyArray == node.keyArray ? node : nil
} }
/// ///
/// - Returns: 0 /// - Parameter updateExisting:
///
/// - Returns: 0
@discardableResult public mutating func update(updateExisting: Bool = false) -> Int { @discardableResult public mutating func update(updateExisting: Bool = false) -> Int {
let maxSpanLength = Megrez.Compositor.maxSpanLength let maxSpanLength = Megrez.Compositor.maxSpanLength
let range = max(0, cursor - maxSpanLength)..<min(cursor + maxSpanLength, keys.count) let range = max(0, cursor - maxSpanLength)..<min(cursor + maxSpanLength, keys.count)
@ -276,16 +297,15 @@ extension Megrez.Compositor {
if theNode.keyArray.count == 1 { continue } if theNode.keyArray.count == 1 { continue }
spans[position].nodes.removeAll { $0 == theNode } spans[position].nodes.removeAll { $0 == theNode }
} else { } else {
theNode.resetUnigrams(using: unigrams) theNode.syncingUnigrams(from: unigrams)
} }
nodesChanged += 1 nodesChanged += 1
continue continue
} }
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray) let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
guard !unigrams.isEmpty else { continue } guard !unigrams.isEmpty else { continue }
insertNode( spans[position].append(
.init(keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams), node: .init(keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams)
at: position
) )
nodesChanged += 1 nodesChanged += 1
} }
@ -293,12 +313,13 @@ extension Megrez.Compositor {
return nodesChanged return nodesChanged
} }
mutating func updateCursorJumpingTables(_ walkedNodes: [Node]) { ///
mutating func updateCursorJumpingTables() {
var cursorRegionMapDict = [Int: Int]() var cursorRegionMapDict = [Int: Int]()
cursorRegionMapDict[-1] = 0 // cursorRegionMapDict[-1] = 0 //
var counter = 0 var counter = 0
for (i, anchor) in walkedNodes.enumerated() { for (i, theNode) in walkedNodes.enumerated() {
for _ in 0..<anchor.spanLength { for _ in 0..<theNode.spanLength {
cursorRegionMapDict[counter] = i cursorRegionMapDict[counter] = i
counter += 1 counter += 1
} }

View File

@ -1,9 +1,11 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez.Compositor { extension Megrez.Compositor {
/// walkedNodes
///
/// ///
/// 使 Cormen 2001 /// 使 Cormen 2001
/// ///
@ -11,23 +13,23 @@ extension Megrez.Compositor {
/// `G = (V, E)` `O(|V|+|E|)` `G` /// `G = (V, E)` `O(|V|+|E|)` `G`
/// 使 /// 使
/// - Returns: /// - Returns:
@discardableResult public mutating func walk() -> ([Node], Bool) { @discardableResult public mutating func walk() -> (walkedNode: [Node], succeeded: Bool) {
var result = [Node]() var result = [Node]()
defer { defer {
walkedNodes = result walkedNodes = result
updateCursorJumpingTables(walkedNodes) updateCursorJumpingTables()
} }
guard !spans.isEmpty else { return (result, true) } guard !spans.isEmpty else { return (result, true) }
var vertexSpans = [VertexSpan]() var vertexSpans = [[Vertex]]()
for _ in spans { for _ in spans {
vertexSpans.append(.init()) vertexSpans.append(.init())
} }
for (i, span) in spans.enumerated() { for (i, span) in spans.enumerated() {
for j in 1...span.maxLength { for j in 1...span.maxLength {
if let p = span.nodeOf(length: j) { if let theNode = span.nodeOf(length: j) {
vertexSpans[i].append(.init(node: p)) vertexSpans[i].append(.init(node: theNode))
} }
} }
} }
@ -60,15 +62,15 @@ extension Megrez.Compositor {
} }
var walked = [Node]() var walked = [Node]()
var totalKeyLength = 0 var totalLengthOfKeys = 0
var it = terminal var iterated = terminal
while let itPrev = it.prev { while let itPrev = iterated.prev {
walked.append(itPrev.node) walked.append(itPrev.node)
it = itPrev iterated = itPrev
totalKeyLength += it.node.spanLength totalLengthOfKeys += iterated.node.spanLength
} }
guard totalKeyLength == keys.count else { guard totalLengthOfKeys == keys.count else {
print("!!! ERROR A") print("!!! ERROR A")
return (result, false) return (result, false)
} }
@ -82,26 +84,3 @@ extension Megrez.Compositor {
return (result, true) return (result, true)
} }
} }
// MARK: - Stable Sort Extension
// Reference: https://stackoverflow.com/a/50545761/4162914
extension Sequence {
/// Return a stable-sorted collection.
///
/// - Parameter areInIncreasingOrder: Return nil when two element are equal.
/// - Returns: The sorted collection.
fileprivate func stableSorted(
by areInIncreasingOrder: (Element, Element) throws -> Bool
)
rethrows -> [Element]
{
try enumerated()
.sorted { a, b -> Bool in
try areInIncreasingOrder(a.element, b.element)
|| (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element))
}
.map(\.element)
}
}

View File

@ -1,13 +1,14 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
import Foundation import Foundation
extension Megrez.Compositor { extension Megrez.Compositor {
///
public struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible { public struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible {
/// ///
public var keyArray: [String] public var keyArray: [String]
/// ///
public var value: String public var value: String
@ -20,7 +21,7 @@ extension Megrez.Compositor {
/// ///
/// - Parameters: /// - Parameters:
/// - key: /// - keyArray:
/// - value: /// - value:
public init(keyArray: [String], value: String = "N/A") { public init(keyArray: [String], value: String = "N/A") {
self.keyArray = keyArray.isEmpty ? ["N/A"] : keyArray self.keyArray = keyArray.isEmpty ? ["N/A"] : keyArray
@ -29,13 +30,15 @@ extension Megrez.Compositor {
/// ///
/// - Parameters: /// - Parameters:
/// - key: /// - key:
/// - value: /// - value:
public init(key: String = "N/A", value: String = "N/A") { public init(key: String = "N/A", value: String = "N/A") {
keyArray = key.isEmpty ? ["N/A"] : key.components(separatedBy: Megrez.Compositor.theSeparator) keyArray = key.isEmpty ? ["N/A"] : key.components(separatedBy: Megrez.Compositor.theSeparator)
self.value = value.isEmpty ? "N/A" : value self.value = value.isEmpty ? "N/A" : value
} }
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) { public func hash(into hasher: inout Hasher) {
hasher.combine(keyArray) hasher.combine(keyArray)
hasher.combine(value) hasher.combine(value)
@ -50,26 +53,30 @@ extension Megrez.Compositor {
} }
public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.keyArray.joined().count < rhs.keyArray.joined().count) (lhs.keyArray.count < rhs.keyArray.count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value < rhs.value) || (lhs.keyArray.count == rhs.keyArray.count && lhs.value < rhs.value)
} }
public static func > (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { public static func > (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.keyArray.joined().count > rhs.keyArray.joined().count) (lhs.keyArray.count > rhs.keyArray.count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value > rhs.value) || (lhs.keyArray.count == rhs.keyArray.count && lhs.value > rhs.value)
} }
public static func <= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { public static func <= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.keyArray.joined().count <= rhs.keyArray.joined().count) (lhs.keyArray.count <= rhs.keyArray.count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value <= rhs.value) || (lhs.keyArray.count == rhs.keyArray.count && lhs.value <= rhs.value)
} }
public static func >= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { public static func >= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.keyArray.joined().count >= rhs.keyArray.joined().count) (lhs.keyArray.count >= rhs.keyArray.count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value >= rhs.value) || (lhs.keyArray.count == rhs.keyArray.count && lhs.value >= rhs.value)
} }
} }
///
/// - all: 穿
/// - beginAt:
/// - endAt
public enum CandidateFetchFilter { case all, beginAt, endAt } public enum CandidateFetchFilter { case all, beginAt, endAt }
/// ///
@ -82,12 +89,12 @@ extension Megrez.Compositor {
guard !keys.isEmpty else { return result } guard !keys.isEmpty else { return result }
let location = max(min(location, keys.count - 1), 0) // let location = max(min(location, keys.count - 1), 0) //
let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted { let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted {
// //
$0.spanLength > $1.spanLength $0.spanLength > $1.spanLength
} }
let keyAtCursor = keys[location] let keyAtCursor = keys[location]
for theNode in anchors.map(\.node) { for theNode in anchors.map(\.node) {
if theNode.keyArray.joined(separator: separator).isEmpty { continue } if theNode.keyArray.isEmpty { continue }
for gram in theNode.unigrams { for gram in theNode.unigrams {
switch filter { switch filter {
case .all: case .all:
@ -106,9 +113,9 @@ extension Megrez.Compositor {
/// 使 /// 使
/// ///
/// ///
/// - Parameters: /// - Parameters:
/// - candidate: /// - candidate:
/// - location: /// - location:
/// - overrideType: /// - overrideType:
/// - Returns: /// - Returns:
@ -139,7 +146,7 @@ extension Megrez.Compositor {
/// 使 /// 使
/// - Parameters: /// - Parameters:
/// - key: /// - keyArray:
/// - location: /// - location:
/// - value: /// - value:
/// - type: /// - type:
@ -151,15 +158,10 @@ extension Megrez.Compositor {
var arrOverlappedNodes: [NodeAnchor] = fetchOverlappingNodes(at: min(keys.count - 1, location)) var arrOverlappedNodes: [NodeAnchor] = fetchOverlappingNodes(at: min(keys.count - 1, location))
var overridden: NodeAnchor? var overridden: NodeAnchor?
for anchor in arrOverlappedNodes { for anchor in arrOverlappedNodes {
if let keyArray = keyArray, if keyArray != nil, anchor.node.keyArray != keyArray { continue }
anchor.node.keyArray.joined(separator: separator) != keyArray.joined(separator: separator) if !anchor.node.selectOverrideUnigram(value: value, type: type) { continue }
{ overridden = anchor
continue break
}
if anchor.node.selectOverrideUnigram(value: value, type: type) {
overridden = anchor
break
}
} }
guard let overridden = overridden else { return false } // guard let overridden = overridden else { return false } //
@ -171,8 +173,8 @@ extension Megrez.Compositor {
arrOverlappedNodes = fetchOverlappingNodes(at: i) arrOverlappedNodes = fetchOverlappingNodes(at: i)
for anchor in arrOverlappedNodes { for anchor in arrOverlappedNodes {
if anchor.node == overridden.node { continue } if anchor.node == overridden.node { continue }
if !overridden.node.keyArray.joined(separator: separator).contains( if !overridden.node.joinedKey(by: "\t").contains(anchor.node.joinedKey(by: "\t"))
anchor.node.keyArray.joined(separator: separator)) || !overridden.node.value.contains(anchor.node.value) || !overridden.node.value.contains(anchor.node.value)
{ {
anchor.node.reset() anchor.node.reset()
continue continue

View File

@ -1,18 +1,28 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez.Compositor { extension Megrez.Compositor {
/// ///
public class Span { public class SpanUnit {
/// nil
public var nodes: [Node?] = [] public var nodes: [Node?] = []
///
///
public private(set) var maxLength = 0 public private(set) var maxLength = 0
/// Megrez.Compositor.maxSpanLength
private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength } private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength }
///
private var allowedLengths: ClosedRange<Int> { 1...maxSpanLength }
///
public init() { public init() {
clear() clear()
} }
/// 0
public func clear() { public func clear() {
nodes.removeAll() nodes.removeAll()
for _ in 0..<maxSpanLength { for _ in 0..<maxSpanLength {
@ -25,7 +35,7 @@ extension Megrez.Compositor {
/// - Parameter node: /// - Parameter node:
/// - Returns: /// - Returns:
@discardableResult public func append(node: Node) -> Bool { @discardableResult public func append(node: Node) -> Bool {
guard (1...maxSpanLength).contains(node.spanLength) else { guard allowedLengths.contains(node.spanLength) else {
return false return false
} }
nodes[node.spanLength - 1] = node nodes[node.spanLength - 1] = node
@ -37,7 +47,7 @@ extension Megrez.Compositor {
/// - Parameter length: /// - Parameter length:
/// - Returns: /// - Returns:
@discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool { @discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool {
guard (1...maxSpanLength).contains(length) else { guard allowedLengths.contains(length) else {
return false return false
} }
for i in length...maxSpanLength { for i in length...maxSpanLength {
@ -47,16 +57,18 @@ extension Megrez.Compositor {
guard length > 1 else { return false } guard length > 1 else { return false }
let maxR = length - 2 let maxR = length - 2
for i in 0...maxR { for i in 0...maxR {
if nodes[maxR - i] != nil { if nodes[maxR - i] == nil { continue }
maxLength = maxR - i + 1 maxLength = maxR - i + 1
break break
}
} }
return true return true
} }
///
/// - Parameter length:
/// - Returns:
public func nodeOf(length: Int) -> Node? { public func nodeOf(length: Int) -> Node? {
guard (1...maxSpanLength).contains(length) else { return nil } guard allowedLengths.contains(length) else { return nil }
return nodes[length - 1] return nodes[length - 1]
} }
} }

View File

@ -1,5 +1,5 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
@ -19,7 +19,13 @@ extension Megrez.Compositor {
public var distance = -(Double.infinity) public var distance = -(Double.infinity)
/// ///
public var topologicallySorted = false public var topologicallySorted = false
///
public var node: Node public var node: Node
///
///
///
/// - Parameter node:
public init(node: Node) { public init(node: Node) {
self.node = node self.node = node
} }
@ -32,19 +38,16 @@ extension Megrez.Compositor {
/// - u: v /// - u: v
/// - v: /// - v:
func relax(u: Vertex, v: inout Vertex) { func relax(u: Vertex, v: inout Vertex) {
/// u w v // u w v
let w: Double = v.node.score let w: Double = v.node.score
/// //
/// v u ww u w v // v u ww u w v
/// v // v
if v.distance < u.distance + w { if v.distance >= u.distance + w { return }
v.distance = u.distance + w v.distance = u.distance + w
v.prev = u v.prev = u
}
} }
typealias VertexSpan = [Vertex]
/// topological /// topological
/// sort /// sort
/// ///
@ -61,13 +64,13 @@ extension Megrez.Compositor {
/// } /// }
/// } /// }
/// ``` /// ```
/// Cormen 2001 Introduction to Algorithms /// Cormen 2001 Introduction to Algorithms
/// - Parameter root: /// - Parameter root:
/// - Returns: /// - Returns:
func topologicalSort(root: Vertex) -> [Vertex] { func topologicalSort(root: Vertex) -> [Vertex] {
class State { class State {
var iterIndex: Int var iterIndex: Int
var vertex: Vertex let vertex: Vertex
init(vertex: Vertex, iterIndex: Int = 0) { init(vertex: Vertex, iterIndex: Int = 0) {
self.vertex = vertex self.vertex = vertex
self.iterIndex = iterIndex self.iterIndex = iterIndex

View File

@ -1,14 +1,16 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez.Compositor { extension Megrez.Compositor {
///
///
/// ///
/// ///
/// ///
/// ///
/// 2 /// 2
public class Node: Equatable, Hashable { public class Node: Equatable, Hashable {
/// ///
/// - withNoOverrides: /// - withNoOverrides:
@ -17,7 +19,7 @@ extension Megrez.Compositor {
/// [("a", -114), ("b", -514), ("c", -1919)] /// [("a", -114), ("b", -514), ("c", -1919)]
/// ("c", -114)使 /// ("c", -114)使
/// ///
/// overridingScore /// overridingScore
/// - withHighScore: overridingScore使 /// - withHighScore: overridingScore使
public enum OverrideType: Int { public enum OverrideType: Int {
case withNoOverrides = 0 case withNoOverrides = 0
@ -36,84 +38,109 @@ extension Megrez.Compositor {
// public var key: String { keyArray.joined(separator: Megrez.Compositor.theSeparator) } // public var key: String { keyArray.joined(separator: Megrez.Compositor.theSeparator) }
///
public private(set) var keyArray: [String] public private(set) var keyArray: [String]
///
public private(set) var spanLength: Int public private(set) var spanLength: Int
///
public private(set) var unigrams: [Megrez.Unigram] public private(set) var unigrams: [Megrez.Unigram]
///
public private(set) var currentOverrideType: Node.OverrideType
///
public private(set) var currentUnigramIndex: Int = 0 { public private(set) var currentUnigramIndex: Int = 0 {
didSet { currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0) } didSet { currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0) }
} }
///
public var currentPair: Megrez.Compositor.KeyValuePaired { .init(keyArray: keyArray, value: value) } public var currentPair: Megrez.Compositor.KeyValuePaired { .init(keyArray: keyArray, value: value) }
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) { public func hash(into hasher: inout Hasher) {
hasher.combine(keyArray) hasher.combine(keyArray)
hasher.combine(spanLength) hasher.combine(spanLength)
hasher.combine(unigrams) hasher.combine(unigrams)
hasher.combine(currentUnigramIndex) hasher.combine(currentUnigramIndex)
hasher.combine(spanLength) hasher.combine(spanLength)
hasher.combine(overrideType) hasher.combine(currentOverrideType)
} }
///
/// currentUnigramIndex 0
/// - Parameter source:
public func resetUnigrams(using source: [Megrez.Unigram]) {
let oldCurrentValue = unigrams[currentUnigramIndex].value
unigrams = source
// if unigrams.isEmpty { unigrams.append(.init(value: key, score: -114.514)) } //
currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0)
let newCurrentValue = unigrams[currentUnigramIndex].value
if oldCurrentValue != newCurrentValue { currentUnigramIndex = 0 }
}
public private(set) var overrideType: Node.OverrideType
public static func == (lhs: Node, rhs: Node) -> Bool { public static func == (lhs: Node, rhs: Node) -> Bool {
lhs.keyArray == rhs.keyArray && lhs.spanLength == rhs.spanLength lhs.keyArray == rhs.keyArray && lhs.spanLength == rhs.spanLength
&& lhs.unigrams == rhs.unigrams && lhs.overrideType == rhs.overrideType && lhs.unigrams == rhs.unigrams && lhs.currentOverrideType == rhs.currentOverrideType
} }
///
///
///
///
///
///
/// 2
/// - Parameters:
/// - keyArray:
/// - spanLength:
/// - unigrams:
public init(keyArray: [String] = [], spanLength: Int = 0, unigrams: [Megrez.Unigram] = []) { public init(keyArray: [String] = [], spanLength: Int = 0, unigrams: [Megrez.Unigram] = []) {
self.keyArray = keyArray self.keyArray = keyArray
self.spanLength = spanLength self.spanLength = max(spanLength, 0)
self.unigrams = unigrams self.unigrams = unigrams
overrideType = .withNoOverrides currentOverrideType = .withNoOverrides
} }
/// ///
public var isReadingMismatched: Bool { public var isReadingMismatched: Bool { keyArray.count != value.count }
keyArray.count != value.count ///
} public var isOverridden: Bool { currentOverrideType != .withNoOverrides }
/// ///
public var currentUnigram: Megrez.Unigram { public var currentUnigram: Megrez.Unigram {
unigrams.isEmpty ? .init() : unigrams[currentUnigramIndex] unigrams.isEmpty ? .init() : unigrams[currentUnigramIndex]
} }
///
public var value: String { currentUnigram.value } public var value: String { currentUnigram.value }
///
public var score: Double { public var score: Double {
guard !unigrams.isEmpty else { return 0 } guard !unigrams.isEmpty else { return 0 }
switch overrideType { switch currentOverrideType {
case .withHighScore: return overridingScore case .withHighScore: return overridingScore
case .withTopUnigramScore: return unigrams[0].score case .withTopUnigramScore: return unigrams[0].score
default: return currentUnigram.score default: return currentUnigram.score
} }
} }
public var isOverriden: Bool { ///
overrideType != .withNoOverrides
}
public func reset() { public func reset() {
currentUnigramIndex = 0 currentUnigramIndex = 0
overrideType = .withNoOverrides currentOverrideType = .withNoOverrides
} }
///
/// - Parameter separator: Compositor.theSeparator
/// - Returns:
public func joinedKey(by separator: String = Megrez.Compositor.theSeparator) -> String { public func joinedKey(by separator: String = Megrez.Compositor.theSeparator) -> String {
keyArray.joined(separator: separator) keyArray.joined(separator: separator)
} }
///
/// currentUnigramIndex 0
/// - Parameter source:
public func syncingUnigrams(from source: [Megrez.Unigram]) {
let oldCurrentValue = unigrams[currentUnigramIndex].value
unigrams = source
// if unigrams.isEmpty { unigrams.append(.init(value: key, score: -114.514)) } //
currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0)
let newCurrentValue = unigrams[currentUnigramIndex].value
if oldCurrentValue != newCurrentValue { reset() }
}
///
/// - Parameters:
/// - value:
/// - type:
/// - Returns:
public func selectOverrideUnigram(value: String, type: Node.OverrideType) -> Bool { public func selectOverrideUnigram(value: String, type: Node.OverrideType) -> Bool {
guard type != .withNoOverrides else { guard type != .withNoOverrides else {
return false return false
@ -121,7 +148,7 @@ extension Megrez.Compositor {
for (i, gram) in unigrams.enumerated() { for (i, gram) in unigrams.enumerated() {
if value != gram.value { continue } if value != gram.value { continue }
currentUnigramIndex = i currentUnigramIndex = i
overrideType = type currentOverrideType = type
return true return true
} }
return false return false
@ -130,18 +157,23 @@ extension Megrez.Compositor {
} }
extension Megrez.Compositor { extension Megrez.Compositor {
/// /// Gramambular 2 NodeInSpan
///
/// Gramambular NodeInSpan
public struct NodeAnchor: Hashable { public struct NodeAnchor: Hashable {
///
let node: Megrez.Compositor.Node let node: Megrez.Compositor.Node
let spanIndex: Int // ///
let spanIndex: Int
///
var spanLength: Int { node.spanLength } var spanLength: Int { node.spanLength }
///
var unigrams: [Megrez.Unigram] { node.unigrams } var unigrams: [Megrez.Unigram] { node.unigrams }
///
var keyArray: [String] { node.keyArray } var keyArray: [String] { node.keyArray }
///
var value: String { node.value } var value: String { node.value }
/// ///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) { public func hash(into hasher: inout Hasher) {
hasher.combine(node) hasher.combine(node)
hasher.combine(spanIndex) hasher.combine(spanIndex)
@ -152,7 +184,7 @@ extension Megrez.Compositor {
// MARK: - Array Extensions. // MARK: - Array Extensions.
extension Array where Element == Megrez.Compositor.Node { extension Array where Element == Megrez.Compositor.Node {
/// ///
public var values: [String] { map(\.value) } public var values: [String] { map(\.value) }
/// ///
@ -163,7 +195,7 @@ extension Array where Element == Megrez.Compositor.Node {
/// ///
public var keyArrays: [[String]] { map(\.keyArray) } public var keyArrays: [[String]] { map(\.keyArray) }
/// (Result A, Result B) /// (Result A, Result B)
/// Result A Result B /// Result A Result B
public var nodeBorderPointDictPair: ([Int: Int], [Int: Int]) { public var nodeBorderPointDictPair: ([Int: Int], [Int: Int]) {
// Result A Result B // Result A Result B
@ -182,7 +214,7 @@ extension Array where Element == Megrez.Compositor.Node {
return (resultA, resultB) return (resultA, resultB)
} }
/// ///
public var totalKeyCount: Int { map(\.keyArray.count).reduce(0, +) } public var totalKeyCount: Int { map(\.keyArray.count).reduce(0, +) }
/// ///
@ -194,9 +226,10 @@ extension Array where Element == Megrez.Compositor.Node {
if cursor >= totalKeyCount { return nilReturn } // if cursor >= totalKeyCount { return nilReturn } //
let cursor = Swift.max(0, cursor) // let cursor = Swift.max(0, cursor) //
nilReturn = cursor..<cursor nilReturn = cursor..<cursor
guard let rearNodeID = nodeBorderPointDictPair.1[cursor] else { return nilReturn } // nilReturn // nilReturn
guard let rearIndex = nodeBorderPointDictPair.0[rearNodeID] else { return nilReturn } // nilReturn guard let rearNodeID = nodeBorderPointDictPair.1[cursor] else { return nilReturn }
guard let frontIndex = nodeBorderPointDictPair.0[rearNodeID + 1] else { return nilReturn } // nilReturn guard let rearIndex = nodeBorderPointDictPair.0[rearNodeID] else { return nilReturn }
guard let frontIndex = nodeBorderPointDictPair.0[rearNodeID + 1] else { return nilReturn }
return rearIndex..<frontIndex return rearIndex..<frontIndex
} }
@ -207,7 +240,7 @@ extension Array where Element == Megrez.Compositor.Node {
/// - Returns: /// - Returns:
public func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? { public func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? {
guard !isEmpty else { return nil } guard !isEmpty else { return nil }
let cursor = Swift.min(Swift.max(0, cursor), totalKeyCount - 1) // let cursor = Swift.max(0, Swift.min(cursor, totalKeyCount - 1)) //
let range = contextRange(ofGivenCursor: cursor) let range = contextRange(ofGivenCursor: cursor)
outCursorPastNode = range.upperBound outCursorPastNode = range.upperBound
guard let rearNodeID = nodeBorderPointDictPair.1[cursor] else { return nil } guard let rearNodeID = nodeBorderPointDictPair.1[cursor] else { return nil }

View File

@ -1,13 +1,13 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
/// ///
public protocol LangModelProtocol { public protocol LangModelProtocol {
/// ///
func unigramsFor(keyArray: [String]) -> [Megrez.Unigram] func unigramsFor(keyArray: [String]) -> [Megrez.Unigram]
/// ///
func hasUnigramsFor(keyArray: [String]) -> Bool func hasUnigramsFor(keyArray: [String]) -> Bool
} }

View File

@ -1,12 +1,12 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez { extension Megrez {
/// ///
@frozen public struct Unigram: Equatable, CustomStringConvertible, Hashable { @frozen public struct Unigram: Equatable, CustomStringConvertible, Hashable {
/// ///
public var value: String public var value: String
/// ///
public var score: Double public var score: Double
@ -15,15 +15,17 @@ extension Megrez {
"(" + value.description + "," + String(score) + ")" "(" + value.description + "," + String(score) + ")"
} }
/// ///
/// - Parameters: /// - Parameters:
/// - value: /// - value:
/// - score: /// - score:
public init(value: String = "", score: Double = 0) { public init(value: String = "", score: Double = 0) {
self.value = value self.value = value
self.score = score self.score = score
} }
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) { public func hash(into hasher: inout Hasher) {
hasher.combine(value) hasher.combine(value)
hasher.combine(score) hasher.combine(score)

View File

@ -1,5 +1,5 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)

View File

@ -1,5 +1,5 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License). // Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License). // Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ==================== // ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT) // This code is released under the MIT license (SPDX-License-Identifier: MIT)
@ -9,9 +9,9 @@ import XCTest
@testable import Megrez @testable import Megrez
final class MegrezTests: XCTestCase { final class MegrezTests: XCTestCase {
func testSpan() throws { func test01_Span() throws {
let langModel = SimpleLM(input: strSampleData) let langModel = SimpleLM(input: strSampleData)
let span = Megrez.Compositor.Span() let span = Megrez.Compositor.SpanUnit()
let n1 = Megrez.Compositor.Node( let n1 = Megrez.Compositor.Node(
keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(keyArray: ["gao1"]) keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(keyArray: ["gao1"])
) )
@ -50,11 +50,11 @@ final class MegrezTests: XCTestCase {
XCTAssertNil(span.nodeOf(length: Megrez.Compositor.maxSpanLength + 1)) XCTAssertNil(span.nodeOf(length: Megrez.Compositor.maxSpanLength + 1))
} }
func testRankedLangModel() throws { func test02_RankedLangModel() throws {
class TestLM: LangModelProtocol { class TestLM: LangModelProtocol {
func hasUnigramsFor(keyArray: [String]) -> Bool { keyArray == ["foo"] } func hasUnigramsFor(keyArray: [String]) -> Bool { keyArray.joined() == "foo" }
func unigramsFor(keyArray: [String]) -> [Megrez.Unigram] { func unigramsFor(keyArray: [String]) -> [Megrez.Unigram] {
keyArray == ["foo"] keyArray.joined() == "foo"
? [.init(value: "middle", score: -5), .init(value: "highest", score: -2), .init(value: "lowest", score: -10)] ? [.init(value: "middle", score: -5), .init(value: "highest", score: -2), .init(value: "lowest", score: -10)]
: .init() : .init()
} }
@ -74,7 +74,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(unigrams[2].score, -10) XCTAssertEqual(unigrams[2].score, -10)
} }
func testCompositor_BasicTests() throws { func test03_Compositor_BasicTests() throws {
var compositor = Megrez.Compositor(with: MockLM()) var compositor = Megrez.Compositor(with: MockLM())
XCTAssertEqual(compositor.separator, Megrez.Compositor.theSeparator) XCTAssertEqual(compositor.separator, Megrez.Compositor.theSeparator)
XCTAssertEqual(compositor.cursor, 0) XCTAssertEqual(compositor.cursor, 0)
@ -93,11 +93,11 @@ final class MegrezTests: XCTestCase {
compositor.dropKey(direction: .rear) compositor.dropKey(direction: .rear)
XCTAssertEqual(compositor.cursor, 0) XCTAssertEqual(compositor.cursor, 0)
XCTAssertEqual(compositor.cursor, 0) XCTAssertEqual(compositor.length, 0)
XCTAssertEqual(compositor.spans.count, 0) XCTAssertEqual(compositor.spans.count, 0)
} }
func testCompositor_InvalidOperations() throws { func test04_Compositor_InvalidOperations() throws {
class TestLM: LangModelProtocol { class TestLM: LangModelProtocol {
func hasUnigramsFor(keyArray: [String]) -> Bool { keyArray == ["foo"] } func hasUnigramsFor(keyArray: [String]) -> Bool { keyArray == ["foo"] }
func unigramsFor(keyArray: [String]) -> [Megrez.Unigram] { func unigramsFor(keyArray: [String]) -> [Megrez.Unigram] {
@ -122,7 +122,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 0) XCTAssertEqual(compositor.length, 0)
} }
func testCompositor_DeleteToTheFrontOfCursor() throws { func test05_Compositor_DeleteToTheFrontOfCursor() throws {
var compositor = Megrez.Compositor(with: MockLM()) var compositor = Megrez.Compositor(with: MockLM())
compositor.insertKey("a") compositor.insertKey("a")
compositor.cursor = 0 compositor.cursor = 0
@ -132,13 +132,14 @@ final class MegrezTests: XCTestCase {
XCTAssertFalse(compositor.dropKey(direction: .rear)) XCTAssertFalse(compositor.dropKey(direction: .rear))
XCTAssertEqual(compositor.cursor, 0) XCTAssertEqual(compositor.cursor, 0)
XCTAssertEqual(compositor.length, 1) XCTAssertEqual(compositor.length, 1)
XCTAssertEqual(compositor.spans.count, 1)
XCTAssertTrue(compositor.dropKey(direction: .front)) XCTAssertTrue(compositor.dropKey(direction: .front))
XCTAssertEqual(compositor.cursor, 0) XCTAssertEqual(compositor.cursor, 0)
XCTAssertEqual(compositor.length, 0) XCTAssertEqual(compositor.length, 0)
XCTAssertEqual(compositor.spans.count, 0) XCTAssertEqual(compositor.spans.count, 0)
} }
func testCompositor_MultipleSpans() throws { func test06_Compositor_MultipleSpans() throws {
var compositor = Megrez.Compositor(with: MockLM()) var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";" compositor.separator = ";"
compositor.insertKey("a") compositor.insertKey("a")
@ -158,7 +159,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[2].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") XCTAssertEqual(compositor.spans[2].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
} }
func testCompositor_SpanDeletionFromFront() throws { func test07_Compositor_SpanDeletionFromFront() throws {
var compositor = Megrez.Compositor(with: MockLM()) var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";" compositor.separator = ";"
compositor.insertKey("a") compositor.insertKey("a")
@ -176,7 +177,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b") XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b")
} }
func testCompositor_SpanDeletionFromMiddle() throws { func test08_Compositor_SpanDeletionFromMiddle() throws {
var compositor = Megrez.Compositor(with: MockLM()) var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";" compositor.separator = ";"
compositor.insertKey("a") compositor.insertKey("a")
@ -211,7 +212,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
} }
func testCompositor_SpanDeletionFromRear() throws { func test09_Compositor_SpanDeletionFromRear() throws {
var compositor = Megrez.Compositor(with: MockLM()) var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";" compositor.separator = ";"
compositor.insertKey("a") compositor.insertKey("a")
@ -231,7 +232,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
} }
func testCompositor_SpanInsertion() throws { func test10_Compositor_SpanInsertion() throws {
var compositor = Megrez.Compositor(with: MockLM()) var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";" compositor.separator = ";"
compositor.insertKey("a") compositor.insertKey("a")
@ -259,7 +260,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[3].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c") XCTAssertEqual(compositor.spans[3].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
} }
func testCompositor_LongGridDeletion() throws { func test11_Compositor_LongGridDeletion() throws {
var compositor = Megrez.Compositor(with: MockLM()) var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = "" compositor.separator = ""
compositor.insertKey("a") compositor.insertKey("a")
@ -294,7 +295,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[8].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "jklmn") XCTAssertEqual(compositor.spans[8].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "jklmn")
} }
func testCompositor_LongGridInsertion() throws { func test12_Compositor_LongGridInsertion() throws {
var compositor = Megrez.Compositor(with: MockLM()) var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = "" compositor.separator = ""
compositor.insertKey("a") compositor.insertKey("a")
@ -331,7 +332,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[8].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "hijklm") XCTAssertEqual(compositor.spans[8].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "hijklm")
} }
func testCompositor_StressBench() throws { func test13_Compositor_StressBench() throws {
NSLog("// Stress test preparation begins.") NSLog("// Stress test preparation begins.")
var compositor = Megrez.Compositor(with: SimpleLM(input: strStressData)) var compositor = Megrez.Compositor(with: SimpleLM(input: strStressData))
for _ in 0..<1919 { for _ in 0..<1919 {
@ -344,7 +345,7 @@ final class MegrezTests: XCTestCase {
NSLog("// Stress test elapsed: \(timeElapsed)s.") NSLog("// Stress test elapsed: \(timeElapsed)s.")
} }
func testCompositor_WordSegmentation() throws { func test14_Compositor_WordSegmentation() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData, swapKeyValue: true)) var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData, swapKeyValue: true))
compositor.separator = "" compositor.separator = ""
for i in "高科技公司的年終獎金" { for i in "高科技公司的年終獎金" {
@ -354,7 +355,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.joinedKeys(by: ""), ["高科技", "公司", "", "年終", "獎金"]) XCTAssertEqual(result.joinedKeys(by: ""), ["高科技", "公司", "", "年終", "獎金"])
} }
func testCompositor_InputTestAndCursorJump() throws { func test15_Compositor_InputTestAndCursorJump() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData)) var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData))
compositor.separator = "" compositor.separator = ""
compositor.insertKey("gao1") compositor.insertKey("gao1")
@ -429,7 +430,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.dumpDOT, expectedDumpDOT) XCTAssertEqual(compositor.dumpDOT, expectedDumpDOT)
} }
func testCompositor_InputTest2() throws { func test16_Compositor_InputTest2() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData)) var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData))
compositor.separator = "" compositor.separator = ""
compositor.insertKey("gao1") compositor.insertKey("gao1")
@ -443,7 +444,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.values, ["高科技", "公司"]) XCTAssertEqual(result.values, ["高科技", "公司"])
} }
func testCompositor_OverrideOverlappingNodes() throws { func test17_Compositor_OverrideOverlappingNodes() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData)) var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData))
compositor.separator = "" compositor.separator = ""
compositor.insertKey("gao1") compositor.insertKey("gao1")
@ -475,7 +476,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.values, ["高科技"]) XCTAssertEqual(result.values, ["高科技"])
} }
func testCompositor_OverrideReset() throws { func test18_Compositor_OverrideReset() throws {
var compositor = Megrez.Compositor( var compositor = Megrez.Compositor(
with: SimpleLM(input: strSampleData + "zhong1jiang3 終講 -11.0\n" + "jiang3jin1 槳襟 -11.0\n")) with: SimpleLM(input: strSampleData + "zhong1jiang3 終講 -11.0\n" + "jiang3jin1 槳襟 -11.0\n"))
compositor.separator = "" compositor.separator = ""
@ -499,7 +500,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.values, ["年終", "槳襟"]) XCTAssertEqual(result.values, ["年終", "槳襟"])
} }
func testCompositor_CandidateDisambiguation() throws { func test19_Compositor_CandidateDisambiguation() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strEmojiSampleData)) var compositor = Megrez.Compositor(with: SimpleLM(input: strEmojiSampleData))
compositor.separator = "" compositor.separator = ""
compositor.insertKey("gao1") compositor.insertKey("gao1")
@ -521,7 +522,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.values, ["高熱", "🔥", "危險"]) XCTAssertEqual(result.values, ["高熱", "🔥", "危險"])
} }
func testCompositor_updateUnigramData() throws { func test20_Compositor_updateUnigramData() throws {
let theLM = SimpleLM(input: strSampleData) let theLM = SimpleLM(input: strSampleData)
var compositor = Megrez.Compositor(with: theLM) var compositor = Megrez.Compositor(with: theLM)
compositor.separator = "" compositor.separator = ""