vChewing-macOS/Packages/vChewing_Megrez/Sources/Megrez/1_Compositor.swift

312 lines
13 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
import Foundation
public extension Megrez {
///
///
///
///
///
/// - Remark: Markov HMM
///
///
///
struct Compositor {
///
public enum TypingDirection { case front, rear }
///
public enum ResizeBehavior { case expand, shrink }
///
public static var maxSpanLength: Int = 10 { didSet { maxSpanLength = max(6, maxSpanLength) } }
/// -
public static var theSeparator: String = "-"
///
public var cursor: Int = 0 {
didSet {
cursor = max(0, min(cursor, length))
marker = cursor
}
}
///
public var marker: Int = 0 { didSet { marker = max(0, min(marker, length)) } }
/// -
public var separator = theSeparator {
didSet {
Self.theSeparator = separator
}
}
///
public var walkedNodes: [Node] = []
///
/// - Remark: spans.count
///
public var length: Int { keys.count }
///
public var isEmpty: Bool { spans.isEmpty && keys.isEmpty }
///
public private(set) var keys = [String]()
///
public private(set) var spans = [SpanUnit]()
/// 使 LangModelRanked
public var langModel: LangModelRanked {
didSet { clear() }
}
///
/// - Parameter langModel:
public init(with langModel: LangModelProtocol, separator: String = "-") {
self.langModel = .init(withLM: langModel)
self.separator = separator
}
///
///
///
///
public mutating func clear() {
cursor = 0
marker = 0
keys.removeAll()
spans.removeAll()
walkedNodes.removeAll()
}
///
/// - Parameter key:
/// - Returns:
@discardableResult public mutating func insertKey(_ key: String) -> Bool {
guard !key.isEmpty, key != separator, langModel.hasUnigramsFor(keyArray: [key]) else { return false }
keys.insert(key, at: cursor)
let gridBackup = spans
resizeGrid(at: cursor, do: .expand)
let nodesInserted = update()
// langModel.hasUnigramsFor() spans
if nodesInserted == 0 {
spans = gridBackup
return false
}
cursor += 1 // update()
return true
}
///
///
/// RearFront
///
/// - Parameter direction:
/// - Returns:
@discardableResult public mutating func dropKey(direction: TypingDirection) -> Bool {
let isBackSpace: Bool = direction == .rear ? true : false
guard cursor != (isBackSpace ? 0 : keys.count) else { return false }
keys.remove(at: cursor - (isBackSpace ? 1 : 0))
cursor -= isBackSpace ? 1 : 0 //
resizeGrid(at: cursor, do: .shrink)
update()
return true
}
///
///
/// RearFront
/// - Parameters:
/// - direction:
/// - isMarker:
///
///
///
/// // InputState KeyHandler
/// NSStringUtils
/// - Returns:
@discardableResult public mutating func jumpCursorBySpan(to direction: TypingDirection, isMarker: Bool = false)
-> Bool
{
var target = isMarker ? marker : cursor
switch direction {
case .front:
if target == length { return false }
case .rear:
if target == 0 { return false }
}
guard let currentRegion = walkedNodes.cursorRegionMap[target] else { return false }
let aRegionForward = max(currentRegion - 1, 0)
let currentRegionBorderRear: Int = walkedNodes[0 ..< currentRegion].map(\.spanLength).reduce(0, +)
switch target {
case currentRegionBorderRear:
switch direction {
case .front:
target =
(currentRegion > walkedNodes.count)
? keys.count : walkedNodes[0 ... currentRegion].map(\.spanLength).reduce(0, +)
case .rear:
target = walkedNodes[0 ..< aRegionForward].map(\.spanLength).reduce(0, +)
}
default:
switch direction {
case .front:
target = currentRegionBorderRear + walkedNodes[currentRegion].spanLength
case .rear:
target = currentRegionBorderRear
}
}
switch isMarker {
case false: cursor = target
case true: marker = target
}
return true
}
/// GraphViz
public var dumpDOT: String {
// C# StringBuilder Swift NSMutableString
let strOutput: NSMutableString = .init(string: "digraph {\ngraph [ rankdir=LR ];\nBOS;\n")
for (p, span) in spans.enumerated() {
for ni in 0 ... (span.maxLength) {
guard let np = span.nodeOf(length: ni) else { continue }
if p == 0 {
strOutput.append("BOS -> \(np.value);\n")
}
strOutput.append("\(np.value);\n")
if (p + ni) < spans.count {
let destinationSpan = spans[p + ni]
for q in 0 ... (destinationSpan.maxLength) {
guard let dn = destinationSpan.nodeOf(length: q) else { continue }
strOutput.append(np.value + " -> " + dn.value + ";\n")
}
}
guard (p + ni) == spans.count else { continue }
strOutput.append(np.value + " -> EOS;\n")
}
}
strOutput.append("EOS;\n}\n")
return strOutput.description
}
}
}
// MARK: - Internal Methods (Maybe Public)
extension Megrez.Compositor {
///
/// - Parameters:
/// - location:
/// - action:
mutating func resizeGrid(at location: Int, do action: ResizeBehavior) {
let location = max(min(location, spans.count), 0) //
switch action {
case .expand:
spans.insert(SpanUnit(), at: location)
if [0, spans.count].contains(location) { return }
case .shrink:
if spans.count == location { return }
spans.remove(at: location)
}
dropWreckedNodes(at: location)
}
/// resizeGrid()
///
///
/// ```
/// Span Index 0 1 2 3
/// (---)
/// (-------)
/// (-----------)
/// ```
/// 2 (SpanIndex = 2) :
/// ```
/// Span Index 0 1 2 3 4
/// (---)
/// (XXX? ?XXX) <-
/// (XXXXXXX? ?XXX) <-
/// ```
///
/// ```
/// Span Index 0 1 2 3
/// (---)
/// (-------)
/// (-----------)
/// ```
/// 2 :
/// ```
/// Span Index 0 1 2 3 4
/// (---)
/// (XXX? <-
/// (XXXXXXX? <-
/// ```
/// - Parameter location:
mutating func dropWreckedNodes(at location: Int) {
let location = max(min(location, spans.count), 0) //
guard !spans.isEmpty else { return }
let affectedLength = Megrez.Compositor.maxSpanLength - 1
let begin = max(0, location - affectedLength)
guard location >= begin else { return }
for i in begin ..< location {
spans[i].dropNodesOfOrBeyond(length: location - i + 1)
}
}
///
/// - Parameter range:
/// - Returns:
func getJoinedKeyArray(range: Range<Int>) -> [String] {
// contains macOS 13 Ventura
guard range.upperBound <= keys.count, range.lowerBound >= 0 else { return [] }
return keys[range].map { String($0) }
}
///
/// - Parameters:
/// - location:
/// - length:
/// - keyArray:
/// - Returns: nil
func getNode(at location: Int, length: Int, keyArray: [String]) -> Node? {
let location = max(min(location, spans.count - 1), 0) //
guard let node = spans[location].nodeOf(length: length) else { return nil }
return keyArray == node.keyArray ? node : nil
}
///
/// - Parameter updateExisting:
///
/// - Returns: 0
@discardableResult public mutating func update(updateExisting: Bool = false) -> Int {
let maxSpanLength = Megrez.Compositor.maxSpanLength
let range = max(0, cursor - maxSpanLength) ..< min(cursor + maxSpanLength, keys.count)
var nodesChanged = 0
for position in range {
for theLength in 1 ... min(maxSpanLength, range.upperBound - position) {
let joinedKeyArray = getJoinedKeyArray(range: position ..< (position + theLength))
if let theNode = getNode(at: position, length: theLength, keyArray: joinedKeyArray) {
if !updateExisting { continue }
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
//
if unigrams.isEmpty {
if theNode.keyArray.count == 1 { continue }
spans[position].nodes.removeAll { $0 == theNode }
} else {
theNode.syncingUnigrams(from: unigrams)
}
nodesChanged += 1
continue
}
let unigrams = langModel.unigramsFor(keyArray: joinedKeyArray)
guard !unigrams.isEmpty else { continue }
spans[position].append(
node: .init(keyArray: joinedKeyArray, spanLength: theLength, unigrams: unigrams)
)
nodesChanged += 1
}
}
return nodesChanged
}
}