Megrez // 2.5.0 update, syncing changes from MegrezNT.

This commit is contained in:
ShikiSuen 2022-12-13 19:25:31 +08:00
parent 5bca4abef5
commit 258d2f7362
11 changed files with 271 additions and 218 deletions

View File

@ -1,5 +1,5 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)

View File

@ -1,8 +1,10 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
import Foundation
extension Megrez {
///
///
@ -20,9 +22,9 @@ extension Megrez {
public enum ResizeBehavior { case expand, shrink }
///
public static var maxSpanLength: Int = 10 { didSet { maxSpanLength = max(6, maxSpanLength) } }
/// -
/// -
public static var theSeparator: String = "-"
///
///
public var cursor: Int = 0 {
didSet {
cursor = max(0, min(cursor, length))
@ -30,30 +32,33 @@ extension Megrez {
}
}
///
///
public var marker: Int = 0 { didSet { marker = max(0, min(marker, length)) } }
/// -
/// -
public var separator = theSeparator {
didSet {
Self.theSeparator = separator
}
}
///
public var width: Int { keys.count }
///
///
public var walkedNodes: [Node] = []
///
///
/// - Remark: spans.count
///
public var length: Int { keys.count }
///
///
public var isEmpty: Bool { spans.isEmpty && keys.isEmpty }
///
///
public private(set) var keys = [String]()
///
public private(set) var spans = [Span]()
///
public private(set) var spans = [SpanUnit]()
/// 使 LangModelRanked
public var langModel: LangModelRanked
public var langModel: LangModelRanked {
didSet { clear() }
}
/// 0
public private(set) var cursorRegionMap: [Int: Int] = .init()
@ -64,8 +69,13 @@ extension Megrez {
self.separator = separator
}
///
///
///
///
public mutating func clear() {
cursor = 0
marker = 0
keys.removeAll()
spans.removeAll()
walkedNodes.removeAll()
@ -107,9 +117,16 @@ extension Megrez {
}
///
///
/// RearFront
/// - Parameters:
/// - direction:
/// - isMarker:
/// - direction:
/// - isMarker:
///
///
///
/// // InputState KeyHandler
/// NSStringUtils
/// - Returns:
@discardableResult public mutating func jumpCursorBySpan(to direction: TypingDirection, isMarker: Bool = false)
-> Bool
@ -117,7 +134,7 @@ extension Megrez {
var target = isMarker ? marker : cursor
switch direction {
case .front:
if target == width { return false }
if target == length { return false }
case .rear:
if target == 0 { return false }
}
@ -152,27 +169,28 @@ extension Megrez {
/// GraphViz
public var dumpDOT: String {
var strOutput = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n"
// C# StringBuilder Swift NSMutableString
let strOutput: NSMutableString = .init(string: "digraph {\ngraph [ rankdir=LR ];\nBOS;\n")
for (p, span) in spans.enumerated() {
for ni in 0...(span.maxLength) {
guard let np = span.nodeOf(length: ni) else { continue }
if p == 0 {
strOutput += "BOS -> \(np.value);\n"
strOutput.append("BOS -> \(np.value);\n")
}
strOutput += "\(np.value);\n"
strOutput.append("\(np.value);\n")
if (p + ni) < spans.count {
let destinationSpan = spans[p + ni]
for q in 0...(destinationSpan.maxLength) {
guard let dn = destinationSpan.nodeOf(length: q) else { continue }
strOutput += np.value + " -> " + dn.value + ";\n"
strOutput.append(np.value + " -> " + dn.value + ";\n")
}
}
guard (p + ni) == spans.count else { continue }
strOutput += np.value + " -> EOS;\n"
strOutput.append(np.value + " -> EOS;\n")
}
}
strOutput += "EOS;\n}\n"
return strOutput
strOutput.append("EOS;\n}\n")
return strOutput.description
}
}
}
@ -180,9 +198,7 @@ extension Megrez {
// MARK: - Internal Methods (Maybe Public)
extension Megrez.Compositor {
// MARK: Internal methods for maintaining the grid.
///
///
/// - Parameters:
/// - location:
/// - action:
@ -190,7 +206,7 @@ extension Megrez.Compositor {
let location = max(min(location, spans.count), 0) //
switch action {
case .expand:
spans.insert(Span(), at: location)
spans.insert(SpanUnit(), at: location)
if [0, spans.count].contains(location) { return }
case .shrink:
if spans.count == location { return }
@ -241,26 +257,31 @@ extension Megrez.Compositor {
}
}
@discardableResult mutating func insertNode(_ node: Node, at location: Int) -> Bool {
let location = max(min(location, spans.count - 1), 0) //
spans[location].append(node: node)
return true
}
///
/// - Parameter range:
/// - Returns:
func getJoinedKeyArray(range: Range<Int>) -> [String] {
// contains macOS 13 Ventura
guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] }
return keys[range].map { String($0) }
}
///
/// - Parameters:
/// - location:
/// - length:
/// - keyArray:
/// - Returns: nil
func getNode(at location: Int, length: Int, keyArray: [String]) -> Node? {
let location = max(min(location, spans.count), 0) //
let location = max(min(location, spans.count - 1), 0) //
guard let node = spans[location].nodeOf(length: length) else { return nil }
return keyArray == node.keyArray ? node : nil
}
///
/// - Returns: 0
/// - Parameter updateExisting:
///
/// - Returns: 0
@discardableResult public mutating func update(updateExisting: Bool = false) -> Int {
let maxSpanLength = Megrez.Compositor.maxSpanLength
let range = max(0, cursor - maxSpanLength)..<min(cursor + maxSpanLength, keys.count)
@ -276,16 +297,15 @@ extension Megrez.Compositor {
if theNode.keyArray.count == 1 { continue }
spans[position].nodes.removeAll { $0 == theNode }
} else {
theNode.resetUnigrams(using: unigrams)
theNode.syncingUnigrams(from: unigrams)
}
nodesChanged += 1
continue
}
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
guard !unigrams.isEmpty else { continue }
insertNode(
.init(keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams),
at: position
spans[position].append(
node: .init(keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams)
)
nodesChanged += 1
}
@ -293,12 +313,13 @@ extension Megrez.Compositor {
return nodesChanged
}
mutating func updateCursorJumpingTables(_ walkedNodes: [Node]) {
///
mutating func updateCursorJumpingTables() {
var cursorRegionMapDict = [Int: Int]()
cursorRegionMapDict[-1] = 0 //
var counter = 0
for (i, anchor) in walkedNodes.enumerated() {
for _ in 0..<anchor.spanLength {
for (i, theNode) in walkedNodes.enumerated() {
for _ in 0..<theNode.spanLength {
cursorRegionMapDict[counter] = i
counter += 1
}

View File

@ -1,9 +1,11 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez.Compositor {
/// walkedNodes
///
///
/// 使 Cormen 2001
///
@ -11,23 +13,23 @@ extension Megrez.Compositor {
/// `G = (V, E)` `O(|V|+|E|)` `G`
/// 使
/// - Returns:
@discardableResult public mutating func walk() -> ([Node], Bool) {
@discardableResult public mutating func walk() -> (walkedNode: [Node], succeeded: Bool) {
var result = [Node]()
defer {
walkedNodes = result
updateCursorJumpingTables(walkedNodes)
updateCursorJumpingTables()
}
guard !spans.isEmpty else { return (result, true) }
var vertexSpans = [VertexSpan]()
var vertexSpans = [[Vertex]]()
for _ in spans {
vertexSpans.append(.init())
}
for (i, span) in spans.enumerated() {
for j in 1...span.maxLength {
if let p = span.nodeOf(length: j) {
vertexSpans[i].append(.init(node: p))
if let theNode = span.nodeOf(length: j) {
vertexSpans[i].append(.init(node: theNode))
}
}
}
@ -60,15 +62,15 @@ extension Megrez.Compositor {
}
var walked = [Node]()
var totalKeyLength = 0
var it = terminal
while let itPrev = it.prev {
var totalLengthOfKeys = 0
var iterated = terminal
while let itPrev = iterated.prev {
walked.append(itPrev.node)
it = itPrev
totalKeyLength += it.node.spanLength
iterated = itPrev
totalLengthOfKeys += iterated.node.spanLength
}
guard totalKeyLength == keys.count else {
guard totalLengthOfKeys == keys.count else {
print("!!! ERROR A")
return (result, false)
}
@ -82,26 +84,3 @@ extension Megrez.Compositor {
return (result, true)
}
}
// MARK: - Stable Sort Extension
// Reference: https://stackoverflow.com/a/50545761/4162914
extension Sequence {
/// Return a stable-sorted collection.
///
/// - Parameter areInIncreasingOrder: Return nil when two element are equal.
/// - Returns: The sorted collection.
fileprivate func stableSorted(
by areInIncreasingOrder: (Element, Element) throws -> Bool
)
rethrows -> [Element]
{
try enumerated()
.sorted { a, b -> Bool in
try areInIncreasingOrder(a.element, b.element)
|| (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element))
}
.map(\.element)
}
}

View File

@ -1,13 +1,14 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
import Foundation
extension Megrez.Compositor {
///
public struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible {
///
///
public var keyArray: [String]
///
public var value: String
@ -20,7 +21,7 @@ extension Megrez.Compositor {
///
/// - Parameters:
/// - key:
/// - keyArray:
/// - value:
public init(keyArray: [String], value: String = "N/A") {
self.keyArray = keyArray.isEmpty ? ["N/A"] : keyArray
@ -29,13 +30,15 @@ extension Megrez.Compositor {
///
/// - Parameters:
/// - key:
/// - key:
/// - value:
public init(key: String = "N/A", value: String = "N/A") {
keyArray = key.isEmpty ? ["N/A"] : key.components(separatedBy: Megrez.Compositor.theSeparator)
self.value = value.isEmpty ? "N/A" : value
}
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) {
hasher.combine(keyArray)
hasher.combine(value)
@ -50,26 +53,30 @@ extension Megrez.Compositor {
}
public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.keyArray.joined().count < rhs.keyArray.joined().count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value < rhs.value)
(lhs.keyArray.count < rhs.keyArray.count)
|| (lhs.keyArray.count == rhs.keyArray.count && lhs.value < rhs.value)
}
public static func > (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.keyArray.joined().count > rhs.keyArray.joined().count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value > rhs.value)
(lhs.keyArray.count > rhs.keyArray.count)
|| (lhs.keyArray.count == rhs.keyArray.count && lhs.value > rhs.value)
}
public static func <= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.keyArray.joined().count <= rhs.keyArray.joined().count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value <= rhs.value)
(lhs.keyArray.count <= rhs.keyArray.count)
|| (lhs.keyArray.count == rhs.keyArray.count && lhs.value <= rhs.value)
}
public static func >= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.keyArray.joined().count >= rhs.keyArray.joined().count)
|| (lhs.keyArray.joined().count == rhs.keyArray.joined().count && lhs.value >= rhs.value)
(lhs.keyArray.count >= rhs.keyArray.count)
|| (lhs.keyArray.count == rhs.keyArray.count && lhs.value >= rhs.value)
}
}
///
/// - all: 穿
/// - beginAt:
/// - endAt
public enum CandidateFetchFilter { case all, beginAt, endAt }
///
@ -82,12 +89,12 @@ extension Megrez.Compositor {
guard !keys.isEmpty else { return result }
let location = max(min(location, keys.count - 1), 0) //
let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted {
//
//
$0.spanLength > $1.spanLength
}
let keyAtCursor = keys[location]
for theNode in anchors.map(\.node) {
if theNode.keyArray.joined(separator: separator).isEmpty { continue }
if theNode.keyArray.isEmpty { continue }
for gram in theNode.unigrams {
switch filter {
case .all:
@ -106,9 +113,9 @@ extension Megrez.Compositor {
/// 使
///
///
///
/// - Parameters:
/// - candidate:
/// - candidate:
/// - location:
/// - overrideType:
/// - Returns:
@ -139,7 +146,7 @@ extension Megrez.Compositor {
/// 使
/// - Parameters:
/// - key:
/// - keyArray:
/// - location:
/// - value:
/// - type:
@ -151,16 +158,11 @@ extension Megrez.Compositor {
var arrOverlappedNodes: [NodeAnchor] = fetchOverlappingNodes(at: min(keys.count - 1, location))
var overridden: NodeAnchor?
for anchor in arrOverlappedNodes {
if let keyArray = keyArray,
anchor.node.keyArray.joined(separator: separator) != keyArray.joined(separator: separator)
{
continue
}
if anchor.node.selectOverrideUnigram(value: value, type: type) {
if keyArray != nil, anchor.node.keyArray != keyArray { continue }
if !anchor.node.selectOverrideUnigram(value: value, type: type) { continue }
overridden = anchor
break
}
}
guard let overridden = overridden else { return false } //
@ -171,8 +173,8 @@ extension Megrez.Compositor {
arrOverlappedNodes = fetchOverlappingNodes(at: i)
for anchor in arrOverlappedNodes {
if anchor.node == overridden.node { continue }
if !overridden.node.keyArray.joined(separator: separator).contains(
anchor.node.keyArray.joined(separator: separator)) || !overridden.node.value.contains(anchor.node.value)
if !overridden.node.joinedKey(by: "\t").contains(anchor.node.joinedKey(by: "\t"))
|| !overridden.node.value.contains(anchor.node.value)
{
anchor.node.reset()
continue

View File

@ -1,18 +1,28 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez.Compositor {
///
public class Span {
///
public class SpanUnit {
/// nil
public var nodes: [Node?] = []
///
///
public private(set) var maxLength = 0
/// Megrez.Compositor.maxSpanLength
private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength }
///
private var allowedLengths: ClosedRange<Int> { 1...maxSpanLength }
///
public init() {
clear()
}
/// 0
public func clear() {
nodes.removeAll()
for _ in 0..<maxSpanLength {
@ -25,7 +35,7 @@ extension Megrez.Compositor {
/// - Parameter node:
/// - Returns:
@discardableResult public func append(node: Node) -> Bool {
guard (1...maxSpanLength).contains(node.spanLength) else {
guard allowedLengths.contains(node.spanLength) else {
return false
}
nodes[node.spanLength - 1] = node
@ -37,7 +47,7 @@ extension Megrez.Compositor {
/// - Parameter length:
/// - Returns:
@discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool {
guard (1...maxSpanLength).contains(length) else {
guard allowedLengths.contains(length) else {
return false
}
for i in length...maxSpanLength {
@ -47,16 +57,18 @@ extension Megrez.Compositor {
guard length > 1 else { return false }
let maxR = length - 2
for i in 0...maxR {
if nodes[maxR - i] != nil {
if nodes[maxR - i] == nil { continue }
maxLength = maxR - i + 1
break
}
}
return true
}
///
/// - Parameter length:
/// - Returns:
public func nodeOf(length: Int) -> Node? {
guard (1...maxSpanLength).contains(length) else { return nil }
guard allowedLengths.contains(length) else { return nil }
return nodes[length - 1]
}
}

View File

@ -1,5 +1,5 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
@ -19,7 +19,13 @@ extension Megrez.Compositor {
public var distance = -(Double.infinity)
///
public var topologicallySorted = false
///
public var node: Node
///
///
///
/// - Parameter node:
public init(node: Node) {
self.node = node
}
@ -32,18 +38,15 @@ extension Megrez.Compositor {
/// - u: v
/// - v:
func relax(u: Vertex, v: inout Vertex) {
/// u w v
// u w v
let w: Double = v.node.score
///
/// v u ww u w v
/// v
if v.distance < u.distance + w {
//
// v u ww u w v
// v
if v.distance >= u.distance + w { return }
v.distance = u.distance + w
v.prev = u
}
}
typealias VertexSpan = [Vertex]
/// topological
/// sort
@ -61,13 +64,13 @@ extension Megrez.Compositor {
/// }
/// }
/// ```
/// Cormen 2001 Introduction to Algorithms
/// Cormen 2001 Introduction to Algorithms
/// - Parameter root:
/// - Returns:
func topologicalSort(root: Vertex) -> [Vertex] {
class State {
var iterIndex: Int
var vertex: Vertex
let vertex: Vertex
init(vertex: Vertex, iterIndex: Int = 0) {
self.vertex = vertex
self.iterIndex = iterIndex

View File

@ -1,14 +1,16 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez.Compositor {
///
///
///
///
///
///
/// 2
/// 2
public class Node: Equatable, Hashable {
///
/// - withNoOverrides:
@ -17,7 +19,7 @@ extension Megrez.Compositor {
/// [("a", -114), ("b", -514), ("c", -1919)]
/// ("c", -114)使
///
/// overridingScore
/// overridingScore
/// - withHighScore: overridingScore使
public enum OverrideType: Int {
case withNoOverrides = 0
@ -36,84 +38,109 @@ extension Megrez.Compositor {
// public var key: String { keyArray.joined(separator: Megrez.Compositor.theSeparator) }
///
public private(set) var keyArray: [String]
///
public private(set) var spanLength: Int
///
public private(set) var unigrams: [Megrez.Unigram]
///
public private(set) var currentOverrideType: Node.OverrideType
///
public private(set) var currentUnigramIndex: Int = 0 {
didSet { currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0) }
}
///
public var currentPair: Megrez.Compositor.KeyValuePaired { .init(keyArray: keyArray, value: value) }
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) {
hasher.combine(keyArray)
hasher.combine(spanLength)
hasher.combine(unigrams)
hasher.combine(currentUnigramIndex)
hasher.combine(spanLength)
hasher.combine(overrideType)
hasher.combine(currentOverrideType)
}
///
/// currentUnigramIndex 0
/// - Parameter source:
public func resetUnigrams(using source: [Megrez.Unigram]) {
let oldCurrentValue = unigrams[currentUnigramIndex].value
unigrams = source
// if unigrams.isEmpty { unigrams.append(.init(value: key, score: -114.514)) } //
currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0)
let newCurrentValue = unigrams[currentUnigramIndex].value
if oldCurrentValue != newCurrentValue { currentUnigramIndex = 0 }
}
public private(set) var overrideType: Node.OverrideType
public static func == (lhs: Node, rhs: Node) -> Bool {
lhs.keyArray == rhs.keyArray && lhs.spanLength == rhs.spanLength
&& lhs.unigrams == rhs.unigrams && lhs.overrideType == rhs.overrideType
&& lhs.unigrams == rhs.unigrams && lhs.currentOverrideType == rhs.currentOverrideType
}
///
///
///
///
///
///
/// 2
/// - Parameters:
/// - keyArray:
/// - spanLength:
/// - unigrams:
public init(keyArray: [String] = [], spanLength: Int = 0, unigrams: [Megrez.Unigram] = []) {
self.keyArray = keyArray
self.spanLength = spanLength
self.spanLength = max(spanLength, 0)
self.unigrams = unigrams
overrideType = .withNoOverrides
currentOverrideType = .withNoOverrides
}
///
public var isReadingMismatched: Bool {
keyArray.count != value.count
}
public var isReadingMismatched: Bool { keyArray.count != value.count }
///
public var isOverridden: Bool { currentOverrideType != .withNoOverrides }
///
///
public var currentUnigram: Megrez.Unigram {
unigrams.isEmpty ? .init() : unigrams[currentUnigramIndex]
}
///
public var value: String { currentUnigram.value }
///
public var score: Double {
guard !unigrams.isEmpty else { return 0 }
switch overrideType {
switch currentOverrideType {
case .withHighScore: return overridingScore
case .withTopUnigramScore: return unigrams[0].score
default: return currentUnigram.score
}
}
public var isOverriden: Bool {
overrideType != .withNoOverrides
}
///
public func reset() {
currentUnigramIndex = 0
overrideType = .withNoOverrides
currentOverrideType = .withNoOverrides
}
///
/// - Parameter separator: Compositor.theSeparator
/// - Returns:
public func joinedKey(by separator: String = Megrez.Compositor.theSeparator) -> String {
keyArray.joined(separator: separator)
}
///
/// currentUnigramIndex 0
/// - Parameter source:
public func syncingUnigrams(from source: [Megrez.Unigram]) {
let oldCurrentValue = unigrams[currentUnigramIndex].value
unigrams = source
// if unigrams.isEmpty { unigrams.append(.init(value: key, score: -114.514)) } //
currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0)
let newCurrentValue = unigrams[currentUnigramIndex].value
if oldCurrentValue != newCurrentValue { reset() }
}
///
/// - Parameters:
/// - value:
/// - type:
/// - Returns:
public func selectOverrideUnigram(value: String, type: Node.OverrideType) -> Bool {
guard type != .withNoOverrides else {
return false
@ -121,7 +148,7 @@ extension Megrez.Compositor {
for (i, gram) in unigrams.enumerated() {
if value != gram.value { continue }
currentUnigramIndex = i
overrideType = type
currentOverrideType = type
return true
}
return false
@ -130,18 +157,23 @@ extension Megrez.Compositor {
}
extension Megrez.Compositor {
///
///
/// Gramambular NodeInSpan
/// Gramambular 2 NodeInSpan
public struct NodeAnchor: Hashable {
///
let node: Megrez.Compositor.Node
let spanIndex: Int //
///
let spanIndex: Int
///
var spanLength: Int { node.spanLength }
///
var unigrams: [Megrez.Unigram] { node.unigrams }
///
var keyArray: [String] { node.keyArray }
///
var value: String { node.value }
///
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) {
hasher.combine(node)
hasher.combine(spanIndex)
@ -152,7 +184,7 @@ extension Megrez.Compositor {
// MARK: - Array Extensions.
extension Array where Element == Megrez.Compositor.Node {
///
///
public var values: [String] { map(\.value) }
///
@ -163,7 +195,7 @@ extension Array where Element == Megrez.Compositor.Node {
///
public var keyArrays: [[String]] { map(\.keyArray) }
/// (Result A, Result B)
/// (Result A, Result B)
/// Result A Result B
public var nodeBorderPointDictPair: ([Int: Int], [Int: Int]) {
// Result A Result B
@ -182,7 +214,7 @@ extension Array where Element == Megrez.Compositor.Node {
return (resultA, resultB)
}
///
///
public var totalKeyCount: Int { map(\.keyArray.count).reduce(0, +) }
///
@ -194,9 +226,10 @@ extension Array where Element == Megrez.Compositor.Node {
if cursor >= totalKeyCount { return nilReturn } //
let cursor = Swift.max(0, cursor) //
nilReturn = cursor..<cursor
guard let rearNodeID = nodeBorderPointDictPair.1[cursor] else { return nilReturn } // nilReturn
guard let rearIndex = nodeBorderPointDictPair.0[rearNodeID] else { return nilReturn } // nilReturn
guard let frontIndex = nodeBorderPointDictPair.0[rearNodeID + 1] else { return nilReturn } // nilReturn
// nilReturn
guard let rearNodeID = nodeBorderPointDictPair.1[cursor] else { return nilReturn }
guard let rearIndex = nodeBorderPointDictPair.0[rearNodeID] else { return nilReturn }
guard let frontIndex = nodeBorderPointDictPair.0[rearNodeID + 1] else { return nilReturn }
return rearIndex..<frontIndex
}
@ -207,7 +240,7 @@ extension Array where Element == Megrez.Compositor.Node {
/// - Returns:
public func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? {
guard !isEmpty else { return nil }
let cursor = Swift.min(Swift.max(0, cursor), totalKeyCount - 1) //
let cursor = Swift.max(0, Swift.min(cursor, totalKeyCount - 1)) //
let range = contextRange(ofGivenCursor: cursor)
outCursorPastNode = range.upperBound
guard let rearNodeID = nodeBorderPointDictPair.1[cursor] else { return nil }

View File

@ -1,13 +1,13 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
///
public protocol LangModelProtocol {
///
///
func unigramsFor(keyArray: [String]) -> [Megrez.Unigram]
///
///
func hasUnigramsFor(keyArray: [String]) -> Bool
}

View File

@ -1,12 +1,12 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
extension Megrez {
///
@frozen public struct Unigram: Equatable, CustomStringConvertible, Hashable {
///
///
public var value: String
///
public var score: Double
@ -15,15 +15,17 @@ extension Megrez {
"(" + value.description + "," + String(score) + ")"
}
///
///
/// - Parameters:
/// - value:
/// - value:
/// - score:
public init(value: String = "", score: Double = 0) {
self.value = value
self.score = score
}
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) {
hasher.combine(value)
hasher.combine(score)

View File

@ -1,5 +1,5 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)

View File

@ -1,5 +1,5 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
@ -9,9 +9,9 @@ import XCTest
@testable import Megrez
final class MegrezTests: XCTestCase {
func testSpan() throws {
func test01_Span() throws {
let langModel = SimpleLM(input: strSampleData)
let span = Megrez.Compositor.Span()
let span = Megrez.Compositor.SpanUnit()
let n1 = Megrez.Compositor.Node(
keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(keyArray: ["gao1"])
)
@ -50,11 +50,11 @@ final class MegrezTests: XCTestCase {
XCTAssertNil(span.nodeOf(length: Megrez.Compositor.maxSpanLength + 1))
}
func testRankedLangModel() throws {
func test02_RankedLangModel() throws {
class TestLM: LangModelProtocol {
func hasUnigramsFor(keyArray: [String]) -> Bool { keyArray == ["foo"] }
func hasUnigramsFor(keyArray: [String]) -> Bool { keyArray.joined() == "foo" }
func unigramsFor(keyArray: [String]) -> [Megrez.Unigram] {
keyArray == ["foo"]
keyArray.joined() == "foo"
? [.init(value: "middle", score: -5), .init(value: "highest", score: -2), .init(value: "lowest", score: -10)]
: .init()
}
@ -74,7 +74,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(unigrams[2].score, -10)
}
func testCompositor_BasicTests() throws {
func test03_Compositor_BasicTests() throws {
var compositor = Megrez.Compositor(with: MockLM())
XCTAssertEqual(compositor.separator, Megrez.Compositor.theSeparator)
XCTAssertEqual(compositor.cursor, 0)
@ -93,11 +93,11 @@ final class MegrezTests: XCTestCase {
compositor.dropKey(direction: .rear)
XCTAssertEqual(compositor.cursor, 0)
XCTAssertEqual(compositor.cursor, 0)
XCTAssertEqual(compositor.length, 0)
XCTAssertEqual(compositor.spans.count, 0)
}
func testCompositor_InvalidOperations() throws {
func test04_Compositor_InvalidOperations() throws {
class TestLM: LangModelProtocol {
func hasUnigramsFor(keyArray: [String]) -> Bool { keyArray == ["foo"] }
func unigramsFor(keyArray: [String]) -> [Megrez.Unigram] {
@ -122,7 +122,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.length, 0)
}
func testCompositor_DeleteToTheFrontOfCursor() throws {
func test05_Compositor_DeleteToTheFrontOfCursor() throws {
var compositor = Megrez.Compositor(with: MockLM())
compositor.insertKey("a")
compositor.cursor = 0
@ -132,13 +132,14 @@ final class MegrezTests: XCTestCase {
XCTAssertFalse(compositor.dropKey(direction: .rear))
XCTAssertEqual(compositor.cursor, 0)
XCTAssertEqual(compositor.length, 1)
XCTAssertEqual(compositor.spans.count, 1)
XCTAssertTrue(compositor.dropKey(direction: .front))
XCTAssertEqual(compositor.cursor, 0)
XCTAssertEqual(compositor.length, 0)
XCTAssertEqual(compositor.spans.count, 0)
}
func testCompositor_MultipleSpans() throws {
func test06_Compositor_MultipleSpans() throws {
var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";"
compositor.insertKey("a")
@ -158,7 +159,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[2].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
}
func testCompositor_SpanDeletionFromFront() throws {
func test07_Compositor_SpanDeletionFromFront() throws {
var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";"
compositor.insertKey("a")
@ -176,7 +177,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "b")
}
func testCompositor_SpanDeletionFromMiddle() throws {
func test08_Compositor_SpanDeletionFromMiddle() throws {
var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";"
compositor.insertKey("a")
@ -211,7 +212,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
}
func testCompositor_SpanDeletionFromRear() throws {
func test09_Compositor_SpanDeletionFromRear() throws {
var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";"
compositor.insertKey("a")
@ -231,7 +232,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[1].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
}
func testCompositor_SpanInsertion() throws {
func test10_Compositor_SpanInsertion() throws {
var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ";"
compositor.insertKey("a")
@ -259,7 +260,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[3].nodeOf(length: 1)?.keyArray.joined(separator: compositor.separator), "c")
}
func testCompositor_LongGridDeletion() throws {
func test11_Compositor_LongGridDeletion() throws {
var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ""
compositor.insertKey("a")
@ -294,7 +295,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[8].nodeOf(length: 5)?.keyArray.joined(separator: compositor.separator), "jklmn")
}
func testCompositor_LongGridInsertion() throws {
func test12_Compositor_LongGridInsertion() throws {
var compositor = Megrez.Compositor(with: MockLM())
compositor.separator = ""
compositor.insertKey("a")
@ -331,7 +332,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.spans[8].nodeOf(length: 6)?.keyArray.joined(separator: compositor.separator), "hijklm")
}
func testCompositor_StressBench() throws {
func test13_Compositor_StressBench() throws {
NSLog("// Stress test preparation begins.")
var compositor = Megrez.Compositor(with: SimpleLM(input: strStressData))
for _ in 0..<1919 {
@ -344,7 +345,7 @@ final class MegrezTests: XCTestCase {
NSLog("// Stress test elapsed: \(timeElapsed)s.")
}
func testCompositor_WordSegmentation() throws {
func test14_Compositor_WordSegmentation() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData, swapKeyValue: true))
compositor.separator = ""
for i in "高科技公司的年終獎金" {
@ -354,7 +355,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.joinedKeys(by: ""), ["高科技", "公司", "", "年終", "獎金"])
}
func testCompositor_InputTestAndCursorJump() throws {
func test15_Compositor_InputTestAndCursorJump() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData))
compositor.separator = ""
compositor.insertKey("gao1")
@ -429,7 +430,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(compositor.dumpDOT, expectedDumpDOT)
}
func testCompositor_InputTest2() throws {
func test16_Compositor_InputTest2() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData))
compositor.separator = ""
compositor.insertKey("gao1")
@ -443,7 +444,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.values, ["高科技", "公司"])
}
func testCompositor_OverrideOverlappingNodes() throws {
func test17_Compositor_OverrideOverlappingNodes() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strSampleData))
compositor.separator = ""
compositor.insertKey("gao1")
@ -475,7 +476,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.values, ["高科技"])
}
func testCompositor_OverrideReset() throws {
func test18_Compositor_OverrideReset() throws {
var compositor = Megrez.Compositor(
with: SimpleLM(input: strSampleData + "zhong1jiang3 終講 -11.0\n" + "jiang3jin1 槳襟 -11.0\n"))
compositor.separator = ""
@ -499,7 +500,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.values, ["年終", "槳襟"])
}
func testCompositor_CandidateDisambiguation() throws {
func test19_Compositor_CandidateDisambiguation() throws {
var compositor = Megrez.Compositor(with: SimpleLM(input: strEmojiSampleData))
compositor.separator = ""
compositor.insertKey("gao1")
@ -521,7 +522,7 @@ final class MegrezTests: XCTestCase {
XCTAssertEqual(result.values, ["高熱", "🔥", "危險"])
}
func testCompositor_updateUnigramData() throws {
func test20_Compositor_updateUnigramData() throws {
let theLM = SimpleLM(input: strSampleData)
var compositor = Megrez.Compositor(with: theLM)
compositor.separator = ""