UOM // Implementing new function sets from upstream.

- Keeping previous methods for generating keys.
This commit is contained in:
ShikiSuen 2022-08-08 09:39:22 +08:00
parent 7ba2983d1e
commit e007ada6a5
3 changed files with 269 additions and 204 deletions

View File

@ -126,31 +126,26 @@ public class KeyHandler {
return arrResult
}
///
///
///
/// - Parameters:
/// - value:
/// - value:
/// - respectCursorPushing: true
func fixNode(candidate: (String, String), respectCursorPushing: Bool = true) {
let actualCursor = actualCandidateCursor
let theCandidate: Megrez.Compositor.Candidate = .init(key: candidate.0, value: candidate.1)
if !compositor.overrideCandidate(theCandidate, at: actualCursor) { return }
if !compositor.overrideCandidate(theCandidate, at: actualCursor, overrideType: .withHighScore) { return }
let previousWalk = compositor.walkedNodes
//
walk()
let currentWalk = compositor.walkedNodes
// 使
var accumulatedCursor = 0
var currentNode: Megrez.Compositor.Node?
for node in compositor.walkedNodes {
accumulatedCursor += node.spanLength
if accumulatedCursor > actualCursor {
currentNode = node
break
}
}
let currentNode = currentWalk.findNode(at: actualCandidateCursor, target: &accumulatedCursor)
guard let currentNode = currentNode else { return }
if currentNode.currentUnigram.score > -12 {
if currentNode.currentUnigram.score > -12, mgrPrefs.fetchSuggestionsFromUserOverrideModel {
IME.prtDebugIntel("UOM: Start Observation.")
// 使
//
@ -158,9 +153,9 @@ public class KeyHandler {
mgrPrefs.failureFlagForUOMObservation = true
//
//
currentUOM.observe(
walkedNodes: compositor.walkedNodes, cursorIndex: actualCursor, candidate: theCandidate.value,
timestamp: NSDate().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() }
currentUOM.performObservation(
walkedBefore: previousWalk, walkedAfter: currentWalk, cursor: actualCandidateCursor,
timestamp: Date().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() }
)
//
mgrPrefs.failureFlagForUOMObservation = false
@ -196,7 +191,7 @@ public class KeyHandler {
return arrCandidates.map { ($0.key, $0.value) }
}
let arrSuggestedUnigrams: [(String, Megrez.Unigram)] = fetchSuggestedCandidates()
let arrSuggestedUnigrams: [(String, Megrez.Unigram)] = fetchSuggestionsFromUOM(apply: false)
let arrSuggestedCandidates: [Megrez.Compositor.Candidate] = arrSuggestedUnigrams.map {
Megrez.Compositor.Candidate(key: $0.0, value: $0.1.value)
}
@ -206,32 +201,40 @@ public class KeyHandler {
return arrCandidates.map { ($0.key, $0.value) }
}
///
func fetchSuggestedCandidates() -> [(String, Megrez.Unigram)] {
currentUOM.suggest(
walkedNodes: compositor.walkedNodes, cursorIndex: compositor.cursor,
timestamp: NSDate().timeIntervalSince1970
).stableSort { $0.1.score > $1.1.score }
}
///
func fetchAndApplySuggestionsFromUserOverrideModel() {
@discardableResult func fetchSuggestionsFromUOM(apply: Bool) -> [(String, Megrez.Unigram)] {
var arrResult = [(String, Megrez.Unigram)]()
///
if mgrPrefs.useSCPCTypingMode { return }
if mgrPrefs.useSCPCTypingMode { return arrResult }
///
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return }
///
let overrideValue = fetchSuggestedCandidates().first?.1.value ?? ""
///
if !overrideValue.isEmpty {
IME.prtDebugIntel(
"UOM: Suggestion retrieved, overriding the node score of the selected candidate.")
// TODO:
compositor.overrideCandidateLiteral(overrideValue, at: actualCandidateCursor, overrideType: .withTopUnigramScore)
} else {
IME.prtDebugIntel("UOM: Blank suggestion retrieved, dismissing.")
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return arrResult }
///
let suggestion = currentUOM.fetchSuggestion(
currentWalk: compositor.walkedNodes, cursor: actualCandidateCursor, timestamp: Date().timeIntervalSince1970
)
arrResult.append(contentsOf: suggestion.candidates)
if apply {
///
if !suggestion.isEmpty, let newestSuggestedCandidate = suggestion.candidates.last {
let overrideBehavior: Megrez.Compositor.Node.OverrideType =
suggestion.forceHighScoreOverride ? .withHighScore : .withTopUnigramScore
let suggestedPair: Megrez.Compositor.Candidate = .init(
key: newestSuggestedCandidate.0, value: newestSuggestedCandidate.1.value
)
IME.prtDebugIntel(
"UOM: Suggestion retrieved, overriding the node score of the selected candidate: \(suggestedPair.toNGramKey)")
if !compositor.overrideCandidate(suggestedPair, at: actualCandidateCursor, overrideType: overrideBehavior) {
compositor.overrideCandidateLiteral(
newestSuggestedCandidate.1.value, at: actualCandidateCursor, overrideType: overrideBehavior
)
}
walk()
} else {
IME.prtDebugIntel("UOM: Blank suggestion retrieved, dismissing.")
}
}
arrResult = arrResult.stableSort { $0.1.score > $1.1.score }
return arrResult
}
// MARK: - Extracted methods and functions (Tekkon).

View File

@ -87,7 +87,7 @@ extension KeyHandler {
walk()
//
fetchAndApplySuggestionsFromUserOverrideModel()
fetchSuggestionsFromUOM(apply: true)
//
composer.clear()

View File

@ -26,174 +26,46 @@ extension vChewing {
mutDecayExponent = log(0.5) / decayConstant
}
public func observe(
walkedNodes: [Megrez.Compositor.Node],
cursorIndex: Int,
candidate: String,
timestamp: Double,
saveCallback: @escaping () -> Void
public func performObservation(
walkedBefore: [Megrez.Compositor.Node], walkedAfter: [Megrez.Compositor.Node],
cursor: Int, timestamp: Double, saveCallback: @escaping () -> Void
) {
let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex)
//
guard !walkedAfter.isEmpty, !walkedBefore.isEmpty else { return }
guard walkedBefore.totalReadingsCount == walkedAfter.totalReadingsCount else { return }
//
var actualCursor = 0
guard let currentNode = walkedAfter.findNode(at: cursor, target: &actualCursor) else { return }
// 使
guard currentNode.spanLength <= 3 else { return }
//
guard actualCursor > 0 else { return } //
let currentNodeIndex = actualCursor
actualCursor -= 1
var prevNodeIndex = 0
guard let prevNode = walkedBefore.findNode(at: actualCursor, target: &prevNodeIndex) else { return }
let forceHighScoreOverride: Bool = currentNode.spanLength > prevNode.spanLength
let breakingUp = currentNode.spanLength == 1 && prevNode.spanLength > 1
let targetNodeIndex = breakingUp ? currentNodeIndex : prevNodeIndex
let key: String = vChewing.LMUserOverride.formObservationKey(
walkedNodes: walkedAfter, headIndex: targetNodeIndex
)
guard !key.isEmpty else { return }
guard mutLRUMap[key] != nil else {
var observation: Observation = .init()
observation.update(candidate: candidate, timestamp: timestamp)
let koPair = KeyObservationPair(key: key, observation: observation)
// key key key
// Swift
mutLRUMap.removeValue(forKey: key)
mutLRUMap[key] = koPair
mutLRUList.insert(koPair, at: 0)
if mutLRUList.count > mutCapacity {
mutLRUMap.removeValue(forKey: mutLRUList[mutLRUList.endIndex].key)
mutLRUList.removeLast()
}
IME.prtDebugIntel("UOM: Observation finished with new observation: \(key)")
saveCallback()
return
}
//
if var theNeta = mutLRUMap[key] {
_ = suggest(
walkedNodes: walkedNodes, cursorIndex: cursorIndex, timestamp: timestamp,
decayCallback: {
theNeta.observation.update(candidate: candidate, timestamp: timestamp)
self.mutLRUList.insert(theNeta, at: 0)
self.mutLRUMap[key] = theNeta
IME.prtDebugIntel("UOM: Observation finished with existing observation: \(key)")
saveCallback()
}
)
}
doObservation(
key: key, candidate: currentNode.currentUnigram.value, timestamp: timestamp,
forceHighScoreOverride: forceHighScoreOverride, saveCallback: { saveCallback() }
)
}
public func suggest(
walkedNodes: [Megrez.Compositor.Node],
cursorIndex: Int,
timestamp: Double,
decayCallback: @escaping () -> Void = {}
) -> [(String, Megrez.Unigram)] {
let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex)
guard !key.isEmpty else {
IME.prtDebugIntel("UOM: Blank key generated on suggestion, aborting suggestion.")
return .init()
}
let currentReadingKey = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex, readingOnly: true)
guard let koPair = mutLRUMap[key] else {
IME.prtDebugIntel("UOM: mutLRUMap[key] is nil, throwing blank suggestion for key: \(key).")
return .init()
}
let observation = koPair.observation
var arrResults = [(String, Megrez.Unigram)]()
var currentHighScore = 0.0
for overrideNeta in Array(observation.overrides) {
let override: Override = overrideNeta.value
let overrideScore: Double = getScore(
eventCount: override.count,
totalCount: observation.count,
eventTimestamp: override.timestamp,
timestamp: timestamp,
lambda: mutDecayExponent
)
if (0...currentHighScore).contains(overrideScore) { continue }
let overrideDetectionScore: Double = getScore(
eventCount: override.count,
totalCount: observation.count,
eventTimestamp: override.timestamp,
timestamp: timestamp,
lambda: mutDecayExponent * 2
)
if (0...currentHighScore).contains(overrideDetectionScore) { decayCallback() }
let newUnigram = Megrez.Unigram(value: overrideNeta.key, score: overrideScore)
arrResults.insert((currentReadingKey, newUnigram), at: 0)
currentHighScore = overrideScore
}
if arrResults.isEmpty {
IME.prtDebugIntel("UOM: No usable suggestions in the result for key: \(key).")
}
return arrResults
}
private func getScore(
eventCount: Int,
totalCount: Int,
eventTimestamp: Double,
timestamp: Double,
lambda: Double
) -> Double {
let decay = exp((timestamp - eventTimestamp) * lambda)
if decay < kDecayThreshold { return 0.0 }
let prob = Double(eventCount) / Double(totalCount)
return prob * decay
}
func convertKeyFrom(
walkedNodes: [Megrez.Compositor.Node], cursorIndex: Int, readingOnly: Bool = false
) -> String {
let whiteList = "你他妳她祢衪它牠再在"
var arrNodes: [Megrez.Compositor.Node] = []
var intLength = 0
for theNodeAnchor in walkedNodes {
arrNodes.append(theNodeAnchor)
intLength += theNodeAnchor.spanLength
if intLength >= cursorIndex {
break
}
}
if arrNodes.isEmpty { return "" }
arrNodes = Array(arrNodes.reversed())
let kvCurrent = arrNodes[0].currentPair
guard !kvCurrent.key.contains("_") else {
return ""
}
//
if kvCurrent.key.split(separator: "-").count != kvCurrent.value.count { return "" }
//
let strCurrent = kvCurrent.key
var kvPrevious = Megrez.KeyValuePaired()
var kvAnterior = Megrez.KeyValuePaired()
var readingStack = ""
var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" }
var result: String {
// kvCurrent
if readingStack.contains("_")
|| (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value))
{
return ""
} else {
return (readingOnly ? strCurrent : trigramKey)
}
}
if arrNodes.count >= 2,
!kvPrevious.key.contains("_"),
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
{
kvPrevious = arrNodes[1].currentPair
readingStack = kvPrevious.key + readingStack
}
if arrNodes.count >= 3,
!kvAnterior.key.contains("_"),
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
{
kvAnterior = arrNodes[2].currentPair
readingStack = kvAnterior.key + readingStack
}
return result
public func fetchSuggestion(
currentWalk: [Megrez.Compositor.Node], cursor: Int, timestamp: Double
) -> Suggestion {
var headIndex = 0
guard let nodeIter = currentWalk.findNode(at: cursor, target: &headIndex) else { return .init() }
let key = vChewing.LMUserOverride.formObservationKey(walkedNodes: currentWalk, headIndex: headIndex)
return getSuggestion(key: key, timestamp: timestamp, headReading: nodeIter.key)
}
}
}
@ -208,6 +80,7 @@ extension vChewing.LMUserOverride {
struct Override: Hashable, Encodable, Decodable {
var count: Int = 0
var timestamp: Double = 0.0
var forceHighScoreOverride = false
static func == (lhs: Override, rhs: Override) -> Bool {
lhs.count == rhs.count && lhs.timestamp == rhs.timestamp
}
@ -242,11 +115,12 @@ extension vChewing.LMUserOverride {
hasher.combine(overrides)
}
mutating func update(candidate: String, timestamp: Double) {
mutating func update(candidate: String, timestamp: Double, forceHighScoreOverride: Bool = false) {
count += 1
if overrides.keys.contains(candidate) {
overrides[candidate]?.timestamp = timestamp
overrides[candidate]?.count += 1
overrides[candidate]?.forceHighScoreOverride = forceHighScoreOverride
} else {
overrides[candidate] = .init(count: 1, timestamp: timestamp)
}
@ -331,4 +205,192 @@ extension vChewing.LMUserOverride {
return
}
}
public struct Suggestion {
var candidates = [(String, Megrez.Unigram)]()
var forceHighScoreOverride = false
var isEmpty: Bool { candidates.isEmpty }
}
}
// MARK: - Array Extensions.
extension Array where Element == Megrez.Compositor.Node {
public var totalReadingsCount: Int {
var counter = 0
for node in self {
counter += node.keyArray.count
}
return counter
}
public func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? {
guard !isEmpty else { return nil }
let cursor = Swift.max(0, Swift.min(cursor, keys.count))
if cursor == 0, let theFirst = first {
outCursorPastNode = theFirst.spanLength
return theFirst
}
//
if cursor >= keys.count - 1, let theLast = last {
outCursorPastNode = keys.count
return theLast
}
var accumulated = 0
for neta in self {
accumulated += neta.spanLength
if accumulated > cursor {
outCursorPastNode = accumulated
return neta
}
}
//
return nil
}
}
// MARK: - Private Methods
extension vChewing.LMUserOverride {
private func doObservation(
key: String, candidate: String, timestamp: Double, forceHighScoreOverride: Bool,
saveCallback: @escaping () -> Void
) {
guard mutLRUMap[key] != nil else {
var observation: Observation = .init()
observation.update(candidate: candidate, timestamp: timestamp, forceHighScoreOverride: forceHighScoreOverride)
let koPair = KeyObservationPair(key: key, observation: observation)
// key key key
// Swift
mutLRUMap.removeValue(forKey: key)
mutLRUMap[key] = koPair
mutLRUList.insert(koPair, at: 0)
if mutLRUList.count > mutCapacity {
mutLRUMap.removeValue(forKey: mutLRUList[mutLRUList.endIndex].key)
mutLRUList.removeLast()
}
IME.prtDebugIntel("UOM: Observation finished with new observation: \(key)")
saveCallback()
return
}
// TODO:
if var theNeta = mutLRUMap[key] {
theNeta.observation.update(
candidate: candidate, timestamp: timestamp, forceHighScoreOverride: forceHighScoreOverride
)
mutLRUList.insert(theNeta, at: 0)
mutLRUMap[key] = theNeta
IME.prtDebugIntel("UOM: Observation finished with existing observation: \(key)")
saveCallback()
}
}
private func getSuggestion(key: String, timestamp: Double, headReading: String) -> Suggestion {
guard !key.isEmpty, let kvPair = mutLRUMap[key] else { return .init() }
let observation: Observation = kvPair.observation
var candidates: [(String, Megrez.Unigram)] = .init()
var forceHighScoreOverride = false
var score: Double = 0
for (i, theObservation) in observation.overrides {
let overrideScore = getScore(
eventCount: theObservation.count, totalCount: observation.count,
eventTimestamp: theObservation.timestamp, timestamp: timestamp, lambda: mutDecayExponent
)
if overrideScore == 0.0 { continue }
if overrideScore > score {
candidates.append((headReading, .init(value: i, score: overrideScore)))
forceHighScoreOverride = theObservation.forceHighScoreOverride
score = overrideScore
}
}
return .init(candidates: candidates, forceHighScoreOverride: forceHighScoreOverride)
}
private func getScore(
eventCount: Int,
totalCount: Int,
eventTimestamp: Double,
timestamp: Double,
lambda: Double
) -> Double {
let decay = exp((timestamp - eventTimestamp) * lambda)
if decay < kDecayThreshold { return 0.0 }
let prob = Double(eventCount) / Double(totalCount)
return prob * decay
}
private static func isPunctuation(_ node: Megrez.Compositor.Node) -> Bool {
for key in node.keyArray {
guard let firstChar = key.first else { continue }
return String(firstChar) == "_"
}
return false
}
private static func formObservationKey(
walkedNodes: [Megrez.Compositor.Node], headIndex cursorIndex: Int, readingOnly: Bool = false
) -> String {
let whiteList = "你他妳她祢衪它牠再在"
var arrNodes: [Megrez.Compositor.Node] = []
var intLength = 0
for theNodeAnchor in walkedNodes {
arrNodes.append(theNodeAnchor)
intLength += theNodeAnchor.spanLength
if intLength >= cursorIndex {
break
}
}
if arrNodes.isEmpty { return "" }
arrNodes = Array(arrNodes.reversed())
let kvCurrent = arrNodes[0].currentPair
guard !kvCurrent.key.contains("_") else {
return ""
}
//
if kvCurrent.key.split(separator: "-").count != kvCurrent.value.count { return "" }
//
let strCurrent = kvCurrent.key
var kvPrevious = Megrez.KeyValuePaired()
var kvAnterior = Megrez.KeyValuePaired()
var readingStack = ""
var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" }
var result: String {
// kvCurrent
if readingStack.contains("_")
|| (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value))
{
return ""
} else {
return (readingOnly ? strCurrent : trigramKey)
}
}
if arrNodes.count >= 2,
!kvPrevious.key.contains("_"),
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
{
kvPrevious = arrNodes[1].currentPair
readingStack = kvPrevious.key + readingStack
}
if arrNodes.count >= 3,
!kvAnterior.key.contains("_"),
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
{
kvAnterior = arrNodes[2].currentPair
readingStack = kvAnterior.key + readingStack
}
return result
}
}