vChewing-macOS/Packages/vChewing_Megrez/Sources/Megrez/6_Node.swift

284 lines
13 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Swiftified and further development by (c) 2022 and onwards The vChewing Project (MIT License).
// Was initially rebranded from (c) Lukhnos Liu's C++ library "Gramambular 2" (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
import Foundation
public extension Megrez.Compositor {
///
///
///
///
///
///
/// 2
class Node: Equatable, Hashable {
///
/// - withNoOverrides:
/// - withTopUnigramScore: 使使
///
/// [("a", -114), ("b", -514), ("c", -1919)]
/// ("c", -114)使
///
/// overridingScore
/// - withHighScore: overridingScore使
public enum OverrideType: Int {
case withNoOverrides = 0
case withTopUnigramScore = 1
case withHighScore = 2
}
///
/// 0使
/// a b cA B C使
/// c bc
/// A->bc A B 使0
/// A-B 0
/// c
public var overridingScore: Double = 114_514
// public var key: String { keyArray.joined(separator: Megrez.Compositor.theSeparator) }
///
public private(set) var keyArray: [String]
///
public private(set) var spanLength: Int
///
public private(set) var unigrams: [Megrez.Unigram]
///
public private(set) var currentOverrideType: Node.OverrideType
///
public private(set) var currentUnigramIndex: Int = 0 {
didSet { currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0) }
}
///
public var currentPair: Megrez.Compositor.KeyValuePaired { .init(keyArray: keyArray, value: value) }
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) {
hasher.combine(keyArray)
hasher.combine(spanLength)
hasher.combine(unigrams)
hasher.combine(currentUnigramIndex)
hasher.combine(spanLength)
hasher.combine(currentOverrideType)
}
public static func == (lhs: Node, rhs: Node) -> Bool {
lhs.keyArray == rhs.keyArray && lhs.spanLength == rhs.spanLength
&& lhs.unigrams == rhs.unigrams && lhs.currentOverrideType == rhs.currentOverrideType
}
///
///
///
///
///
///
/// 2
/// - Parameters:
/// - keyArray:
/// - spanLength:
/// - unigrams:
public init(keyArray: [String] = [], spanLength: Int = 0, unigrams: [Megrez.Unigram] = []) {
self.keyArray = keyArray
self.spanLength = max(spanLength, 0)
self.unigrams = unigrams
currentOverrideType = .withNoOverrides
}
///
public var isReadingMismatched: Bool { keyArray.count != value.count }
///
public var isOverridden: Bool { currentOverrideType != .withNoOverrides }
///
public var currentUnigram: Megrez.Unigram {
unigrams.isEmpty ? .init() : unigrams[currentUnigramIndex]
}
///
public var value: String { currentUnigram.value }
///
public var score: Double {
guard !unigrams.isEmpty else { return 0 }
switch currentOverrideType {
case .withHighScore: return overridingScore
case .withTopUnigramScore: return unigrams[0].score
default: return currentUnigram.score
}
}
///
public func reset() {
currentUnigramIndex = 0
currentOverrideType = .withNoOverrides
}
///
/// - Parameter separator: Compositor.theSeparator
/// - Returns:
public func joinedKey(by separator: String = Megrez.Compositor.theSeparator) -> String {
keyArray.joined(separator: separator)
}
///
/// currentUnigramIndex 0
/// - Parameter source:
public func syncingUnigrams(from source: [Megrez.Unigram]) {
let oldCurrentValue = unigrams[currentUnigramIndex].value
unigrams = source
// if unigrams.isEmpty { unigrams.append(.init(value: key, score: -114.514)) } //
currentUnigramIndex = max(min(unigrams.count - 1, currentUnigramIndex), 0)
let newCurrentValue = unigrams[currentUnigramIndex].value
if oldCurrentValue != newCurrentValue { reset() }
}
///
/// - Parameters:
/// - value:
/// - type:
/// - Returns:
public func selectOverrideUnigram(value: String, type: Node.OverrideType) -> Bool {
guard type != .withNoOverrides else {
return false
}
for (i, gram) in unigrams.enumerated() {
if value != gram.value { continue }
currentUnigramIndex = i
currentOverrideType = type
return true
}
return false
}
}
}
public extension Megrez.Compositor {
/// Gramambular 2 NodeInSpan
struct NodeAnchor: Hashable {
///
let node: Megrez.Compositor.Node
///
let spanIndex: Int
///
var spanLength: Int { node.spanLength }
///
var unigrams: [Megrez.Unigram] { node.unigrams }
///
var keyArray: [String] { node.keyArray }
///
var value: String { node.value }
///
/// - Parameter hasher:
public func hash(into hasher: inout Hasher) {
hasher.combine(node)
hasher.combine(spanIndex)
}
}
}
// MARK: - Array Extensions.
public extension Array where Element == Megrez.Compositor.Node {
///
var values: [String] { map(\.value) }
///
func joinedKeys(by separator: String = Megrez.Compositor.theSeparator) -> [String] {
map { $0.keyArray.lazy.joined(separator: separator) }
}
///
var keyArrays: [[String]] { map(\.keyArray) }
/// (Result A, Result B)
/// Result A Result B
private var nodeBorderPointDictPair: (regionCursorMap: [Int: Int], cursorRegionMap: [Int: Int]) {
// Result A Result B
var resultA = [Int: Int]()
var resultB: [Int: Int] = [-1: 0] //
var cursorCounter = 0
for (nodeCounter, neta) in enumerated() {
resultA[nodeCounter] = cursorCounter
neta.keyArray.forEach { _ in
resultB[cursorCounter] = nodeCounter
cursorCounter += 1
}
}
resultA[count] = cursorCounter
resultB[cursorCounter] = count
return (resultA, resultB)
}
/// 0
var cursorRegionMap: [Int: Int] { nodeBorderPointDictPair.cursorRegionMap }
///
var totalKeyCount: Int { map(\.keyArray.count).reduce(0, +) }
///
/// - Parameter cursor:
func contextRange(ofGivenCursor cursor: Int) -> Range<Int> {
guard !isEmpty else { return 0 ..< 0 }
let lastSpanningLength = reversed()[0].keyArray.count
var nilReturn = (totalKeyCount - lastSpanningLength) ..< totalKeyCount
if cursor >= totalKeyCount { return nilReturn } //
let cursor = Swift.max(0, cursor) //
nilReturn = cursor ..< cursor
// nilReturn
guard let rearNodeID = nodeBorderPointDictPair.cursorRegionMap[cursor] else { return nilReturn }
guard let rearIndex = nodeBorderPointDictPair.regionCursorMap[rearNodeID] else { return nilReturn }
guard let frontIndex = nodeBorderPointDictPair.regionCursorMap[rearNodeID + 1] else { return nilReturn }
return rearIndex ..< frontIndex
}
///
/// - Parameters:
/// - cursor:
/// - outCursorPastNode:
/// - Returns:
func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? {
guard !isEmpty else { return nil }
let cursor = Swift.max(0, Swift.min(cursor, totalKeyCount - 1)) //
let range = contextRange(ofGivenCursor: cursor)
outCursorPastNode = range.upperBound
guard let rearNodeID = nodeBorderPointDictPair.1[cursor] else { return nil }
return count - 1 >= rearNodeID ? self[rearNodeID] : nil
}
///
/// - Parameter cursor:
/// - Returns:
func findNode(at cursor: Int) -> Megrez.Compositor.Node? {
var useless = 0
return findNode(at: cursor, target: &useless)
}
/// 使 Megrez KeyValuePaired
var smashedPairs: [(key: String, value: String)] {
var arrData = [(key: String, value: String)]()
let separator = Megrez.Compositor.theSeparator
forEach { node in
if node.isReadingMismatched {
var newKey = node.joinedKey()
if !separator.isEmpty, newKey != separator, newKey.contains(separator) {
newKey = newKey.replacingOccurrences(of: separator, with: "\t")
}
arrData.append((key: newKey, value: node.value))
return
}
let arrValueChars = node.value.map(\.description)
node.keyArray.enumerated().forEach { i, key in
arrData.append((key: key, value: arrValueChars[i]))
}
}
return arrData
}
}