UOM // Implementing new function sets from upstream.

- Keeping previous methods for generating keys.
This commit is contained in:
ShikiSuen 2022-08-08 09:39:22 +08:00
parent 0cb8ffd649
commit e053273815
3 changed files with 269 additions and 204 deletions

View File

@ -126,31 +126,26 @@ public class KeyHandler {
return arrResult return arrResult
} }
/// ///
/// ///
/// - Parameters: /// - Parameters:
/// - value: /// - value:
/// - respectCursorPushing: true /// - respectCursorPushing: true
func fixNode(candidate: (String, String), respectCursorPushing: Bool = true) { func fixNode(candidate: (String, String), respectCursorPushing: Bool = true) {
let actualCursor = actualCandidateCursor let actualCursor = actualCandidateCursor
let theCandidate: Megrez.Compositor.Candidate = .init(key: candidate.0, value: candidate.1) let theCandidate: Megrez.Compositor.Candidate = .init(key: candidate.0, value: candidate.1)
if !compositor.overrideCandidate(theCandidate, at: actualCursor) { return } if !compositor.overrideCandidate(theCandidate, at: actualCursor, overrideType: .withHighScore) { return }
let previousWalk = compositor.walkedNodes
// //
walk() walk()
let currentWalk = compositor.walkedNodes
// 使 // 使
var accumulatedCursor = 0 var accumulatedCursor = 0
var currentNode: Megrez.Compositor.Node? let currentNode = currentWalk.findNode(at: actualCandidateCursor, target: &accumulatedCursor)
for node in compositor.walkedNodes {
accumulatedCursor += node.spanLength
if accumulatedCursor > actualCursor {
currentNode = node
break
}
}
guard let currentNode = currentNode else { return } guard let currentNode = currentNode else { return }
if currentNode.currentUnigram.score > -12 { if currentNode.currentUnigram.score > -12, mgrPrefs.fetchSuggestionsFromUserOverrideModel {
IME.prtDebugIntel("UOM: Start Observation.") IME.prtDebugIntel("UOM: Start Observation.")
// 使 // 使
// //
@ -158,9 +153,9 @@ public class KeyHandler {
mgrPrefs.failureFlagForUOMObservation = true mgrPrefs.failureFlagForUOMObservation = true
// //
// //
currentUOM.observe( currentUOM.performObservation(
walkedNodes: compositor.walkedNodes, cursorIndex: actualCursor, candidate: theCandidate.value, walkedBefore: previousWalk, walkedAfter: currentWalk, cursor: actualCandidateCursor,
timestamp: NSDate().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() } timestamp: Date().timeIntervalSince1970, saveCallback: { mgrLangModel.saveUserOverrideModelData() }
) )
// //
mgrPrefs.failureFlagForUOMObservation = false mgrPrefs.failureFlagForUOMObservation = false
@ -196,7 +191,7 @@ public class KeyHandler {
return arrCandidates.map { ($0.key, $0.value) } return arrCandidates.map { ($0.key, $0.value) }
} }
let arrSuggestedUnigrams: [(String, Megrez.Unigram)] = fetchSuggestedCandidates() let arrSuggestedUnigrams: [(String, Megrez.Unigram)] = fetchSuggestionsFromUOM(apply: false)
let arrSuggestedCandidates: [Megrez.Compositor.Candidate] = arrSuggestedUnigrams.map { let arrSuggestedCandidates: [Megrez.Compositor.Candidate] = arrSuggestedUnigrams.map {
Megrez.Compositor.Candidate(key: $0.0, value: $0.1.value) Megrez.Compositor.Candidate(key: $0.0, value: $0.1.value)
} }
@ -206,32 +201,40 @@ public class KeyHandler {
return arrCandidates.map { ($0.key, $0.value) } return arrCandidates.map { ($0.key, $0.value) }
} }
///
func fetchSuggestedCandidates() -> [(String, Megrez.Unigram)] {
currentUOM.suggest(
walkedNodes: compositor.walkedNodes, cursorIndex: compositor.cursor,
timestamp: NSDate().timeIntervalSince1970
).stableSort { $0.1.score > $1.1.score }
}
/// ///
func fetchAndApplySuggestionsFromUserOverrideModel() { @discardableResult func fetchSuggestionsFromUOM(apply: Bool) -> [(String, Megrez.Unigram)] {
var arrResult = [(String, Megrez.Unigram)]()
/// ///
if mgrPrefs.useSCPCTypingMode { return } if mgrPrefs.useSCPCTypingMode { return arrResult }
/// ///
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return } if !mgrPrefs.fetchSuggestionsFromUserOverrideModel { return arrResult }
/// ///
let overrideValue = fetchSuggestedCandidates().first?.1.value ?? "" let suggestion = currentUOM.fetchSuggestion(
currentWalk: compositor.walkedNodes, cursor: actualCandidateCursor, timestamp: Date().timeIntervalSince1970
/// )
if !overrideValue.isEmpty { arrResult.append(contentsOf: suggestion.candidates)
IME.prtDebugIntel( if apply {
"UOM: Suggestion retrieved, overriding the node score of the selected candidate.") ///
// TODO: if !suggestion.isEmpty, let newestSuggestedCandidate = suggestion.candidates.last {
compositor.overrideCandidateLiteral(overrideValue, at: actualCandidateCursor, overrideType: .withTopUnigramScore) let overrideBehavior: Megrez.Compositor.Node.OverrideType =
} else { suggestion.forceHighScoreOverride ? .withHighScore : .withTopUnigramScore
IME.prtDebugIntel("UOM: Blank suggestion retrieved, dismissing.") let suggestedPair: Megrez.Compositor.Candidate = .init(
key: newestSuggestedCandidate.0, value: newestSuggestedCandidate.1.value
)
IME.prtDebugIntel(
"UOM: Suggestion retrieved, overriding the node score of the selected candidate: \(suggestedPair.toNGramKey)")
if !compositor.overrideCandidate(suggestedPair, at: actualCandidateCursor, overrideType: overrideBehavior) {
compositor.overrideCandidateLiteral(
newestSuggestedCandidate.1.value, at: actualCandidateCursor, overrideType: overrideBehavior
)
}
walk()
} else {
IME.prtDebugIntel("UOM: Blank suggestion retrieved, dismissing.")
}
} }
arrResult = arrResult.stableSort { $0.1.score > $1.1.score }
return arrResult
} }
// MARK: - Extracted methods and functions (Tekkon). // MARK: - Extracted methods and functions (Tekkon).

View File

@ -87,7 +87,7 @@ extension KeyHandler {
walk() walk()
// //
fetchAndApplySuggestionsFromUserOverrideModel() fetchSuggestionsFromUOM(apply: true)
// //
composer.clear() composer.clear()

View File

@ -26,174 +26,46 @@ extension vChewing {
mutDecayExponent = log(0.5) / decayConstant mutDecayExponent = log(0.5) / decayConstant
} }
public func observe( public func performObservation(
walkedNodes: [Megrez.Compositor.Node], walkedBefore: [Megrez.Compositor.Node], walkedAfter: [Megrez.Compositor.Node],
cursorIndex: Int, cursor: Int, timestamp: Double, saveCallback: @escaping () -> Void
candidate: String,
timestamp: Double,
saveCallback: @escaping () -> Void
) { ) {
let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex) //
guard !walkedAfter.isEmpty, !walkedBefore.isEmpty else { return }
guard walkedBefore.totalReadingsCount == walkedAfter.totalReadingsCount else { return }
//
var actualCursor = 0
guard let currentNode = walkedAfter.findNode(at: cursor, target: &actualCursor) else { return }
// 使
guard currentNode.spanLength <= 3 else { return }
//
guard actualCursor > 0 else { return } //
let currentNodeIndex = actualCursor
actualCursor -= 1
var prevNodeIndex = 0
guard let prevNode = walkedBefore.findNode(at: actualCursor, target: &prevNodeIndex) else { return }
let forceHighScoreOverride: Bool = currentNode.spanLength > prevNode.spanLength
let breakingUp = currentNode.spanLength == 1 && prevNode.spanLength > 1
let targetNodeIndex = breakingUp ? currentNodeIndex : prevNodeIndex
let key: String = vChewing.LMUserOverride.formObservationKey(
walkedNodes: walkedAfter, headIndex: targetNodeIndex
)
guard !key.isEmpty else { return } guard !key.isEmpty else { return }
doObservation(
guard mutLRUMap[key] != nil else { key: key, candidate: currentNode.currentUnigram.value, timestamp: timestamp,
var observation: Observation = .init() forceHighScoreOverride: forceHighScoreOverride, saveCallback: { saveCallback() }
observation.update(candidate: candidate, timestamp: timestamp) )
let koPair = KeyObservationPair(key: key, observation: observation)
// key key key
// Swift
mutLRUMap.removeValue(forKey: key)
mutLRUMap[key] = koPair
mutLRUList.insert(koPair, at: 0)
if mutLRUList.count > mutCapacity {
mutLRUMap.removeValue(forKey: mutLRUList[mutLRUList.endIndex].key)
mutLRUList.removeLast()
}
IME.prtDebugIntel("UOM: Observation finished with new observation: \(key)")
saveCallback()
return
}
//
if var theNeta = mutLRUMap[key] {
_ = suggest(
walkedNodes: walkedNodes, cursorIndex: cursorIndex, timestamp: timestamp,
decayCallback: {
theNeta.observation.update(candidate: candidate, timestamp: timestamp)
self.mutLRUList.insert(theNeta, at: 0)
self.mutLRUMap[key] = theNeta
IME.prtDebugIntel("UOM: Observation finished with existing observation: \(key)")
saveCallback()
}
)
}
} }
public func suggest( public func fetchSuggestion(
walkedNodes: [Megrez.Compositor.Node], currentWalk: [Megrez.Compositor.Node], cursor: Int, timestamp: Double
cursorIndex: Int, ) -> Suggestion {
timestamp: Double, var headIndex = 0
decayCallback: @escaping () -> Void = {} guard let nodeIter = currentWalk.findNode(at: cursor, target: &headIndex) else { return .init() }
) -> [(String, Megrez.Unigram)] { let key = vChewing.LMUserOverride.formObservationKey(walkedNodes: currentWalk, headIndex: headIndex)
let key = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex) return getSuggestion(key: key, timestamp: timestamp, headReading: nodeIter.key)
guard !key.isEmpty else {
IME.prtDebugIntel("UOM: Blank key generated on suggestion, aborting suggestion.")
return .init()
}
let currentReadingKey = convertKeyFrom(walkedNodes: walkedNodes, cursorIndex: cursorIndex, readingOnly: true)
guard let koPair = mutLRUMap[key] else {
IME.prtDebugIntel("UOM: mutLRUMap[key] is nil, throwing blank suggestion for key: \(key).")
return .init()
}
let observation = koPair.observation
var arrResults = [(String, Megrez.Unigram)]()
var currentHighScore = 0.0
for overrideNeta in Array(observation.overrides) {
let override: Override = overrideNeta.value
let overrideScore: Double = getScore(
eventCount: override.count,
totalCount: observation.count,
eventTimestamp: override.timestamp,
timestamp: timestamp,
lambda: mutDecayExponent
)
if (0...currentHighScore).contains(overrideScore) { continue }
let overrideDetectionScore: Double = getScore(
eventCount: override.count,
totalCount: observation.count,
eventTimestamp: override.timestamp,
timestamp: timestamp,
lambda: mutDecayExponent * 2
)
if (0...currentHighScore).contains(overrideDetectionScore) { decayCallback() }
let newUnigram = Megrez.Unigram(value: overrideNeta.key, score: overrideScore)
arrResults.insert((currentReadingKey, newUnigram), at: 0)
currentHighScore = overrideScore
}
if arrResults.isEmpty {
IME.prtDebugIntel("UOM: No usable suggestions in the result for key: \(key).")
}
return arrResults
}
private func getScore(
eventCount: Int,
totalCount: Int,
eventTimestamp: Double,
timestamp: Double,
lambda: Double
) -> Double {
let decay = exp((timestamp - eventTimestamp) * lambda)
if decay < kDecayThreshold { return 0.0 }
let prob = Double(eventCount) / Double(totalCount)
return prob * decay
}
func convertKeyFrom(
walkedNodes: [Megrez.Compositor.Node], cursorIndex: Int, readingOnly: Bool = false
) -> String {
let whiteList = "你他妳她祢衪它牠再在"
var arrNodes: [Megrez.Compositor.Node] = []
var intLength = 0
for theNodeAnchor in walkedNodes {
arrNodes.append(theNodeAnchor)
intLength += theNodeAnchor.spanLength
if intLength >= cursorIndex {
break
}
}
if arrNodes.isEmpty { return "" }
arrNodes = Array(arrNodes.reversed())
let kvCurrent = arrNodes[0].currentPair
guard !kvCurrent.key.contains("_") else {
return ""
}
//
if kvCurrent.key.split(separator: "-").count != kvCurrent.value.count { return "" }
//
let strCurrent = kvCurrent.key
var kvPrevious = Megrez.KeyValuePaired()
var kvAnterior = Megrez.KeyValuePaired()
var readingStack = ""
var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" }
var result: String {
// kvCurrent
if readingStack.contains("_")
|| (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value))
{
return ""
} else {
return (readingOnly ? strCurrent : trigramKey)
}
}
if arrNodes.count >= 2,
!kvPrevious.key.contains("_"),
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
{
kvPrevious = arrNodes[1].currentPair
readingStack = kvPrevious.key + readingStack
}
if arrNodes.count >= 3,
!kvAnterior.key.contains("_"),
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
{
kvAnterior = arrNodes[2].currentPair
readingStack = kvAnterior.key + readingStack
}
return result
} }
} }
} }
@ -208,6 +80,7 @@ extension vChewing.LMUserOverride {
struct Override: Hashable, Encodable, Decodable { struct Override: Hashable, Encodable, Decodable {
var count: Int = 0 var count: Int = 0
var timestamp: Double = 0.0 var timestamp: Double = 0.0
var forceHighScoreOverride = false
static func == (lhs: Override, rhs: Override) -> Bool { static func == (lhs: Override, rhs: Override) -> Bool {
lhs.count == rhs.count && lhs.timestamp == rhs.timestamp lhs.count == rhs.count && lhs.timestamp == rhs.timestamp
} }
@ -242,11 +115,12 @@ extension vChewing.LMUserOverride {
hasher.combine(overrides) hasher.combine(overrides)
} }
mutating func update(candidate: String, timestamp: Double) { mutating func update(candidate: String, timestamp: Double, forceHighScoreOverride: Bool = false) {
count += 1 count += 1
if overrides.keys.contains(candidate) { if overrides.keys.contains(candidate) {
overrides[candidate]?.timestamp = timestamp overrides[candidate]?.timestamp = timestamp
overrides[candidate]?.count += 1 overrides[candidate]?.count += 1
overrides[candidate]?.forceHighScoreOverride = forceHighScoreOverride
} else { } else {
overrides[candidate] = .init(count: 1, timestamp: timestamp) overrides[candidate] = .init(count: 1, timestamp: timestamp)
} }
@ -331,4 +205,192 @@ extension vChewing.LMUserOverride {
return return
} }
} }
public struct Suggestion {
var candidates = [(String, Megrez.Unigram)]()
var forceHighScoreOverride = false
var isEmpty: Bool { candidates.isEmpty }
}
}
// MARK: - Array Extensions.
extension Array where Element == Megrez.Compositor.Node {
public var totalReadingsCount: Int {
var counter = 0
for node in self {
counter += node.keyArray.count
}
return counter
}
public func findNode(at cursor: Int, target outCursorPastNode: inout Int) -> Megrez.Compositor.Node? {
guard !isEmpty else { return nil }
let cursor = Swift.max(0, Swift.min(cursor, keys.count))
if cursor == 0, let theFirst = first {
outCursorPastNode = theFirst.spanLength
return theFirst
}
//
if cursor >= keys.count - 1, let theLast = last {
outCursorPastNode = keys.count
return theLast
}
var accumulated = 0
for neta in self {
accumulated += neta.spanLength
if accumulated > cursor {
outCursorPastNode = accumulated
return neta
}
}
//
return nil
}
}
// MARK: - Private Methods
extension vChewing.LMUserOverride {
private func doObservation(
key: String, candidate: String, timestamp: Double, forceHighScoreOverride: Bool,
saveCallback: @escaping () -> Void
) {
guard mutLRUMap[key] != nil else {
var observation: Observation = .init()
observation.update(candidate: candidate, timestamp: timestamp, forceHighScoreOverride: forceHighScoreOverride)
let koPair = KeyObservationPair(key: key, observation: observation)
// key key key
// Swift
mutLRUMap.removeValue(forKey: key)
mutLRUMap[key] = koPair
mutLRUList.insert(koPair, at: 0)
if mutLRUList.count > mutCapacity {
mutLRUMap.removeValue(forKey: mutLRUList[mutLRUList.endIndex].key)
mutLRUList.removeLast()
}
IME.prtDebugIntel("UOM: Observation finished with new observation: \(key)")
saveCallback()
return
}
// TODO:
if var theNeta = mutLRUMap[key] {
theNeta.observation.update(
candidate: candidate, timestamp: timestamp, forceHighScoreOverride: forceHighScoreOverride
)
mutLRUList.insert(theNeta, at: 0)
mutLRUMap[key] = theNeta
IME.prtDebugIntel("UOM: Observation finished with existing observation: \(key)")
saveCallback()
}
}
private func getSuggestion(key: String, timestamp: Double, headReading: String) -> Suggestion {
guard !key.isEmpty, let kvPair = mutLRUMap[key] else { return .init() }
let observation: Observation = kvPair.observation
var candidates: [(String, Megrez.Unigram)] = .init()
var forceHighScoreOverride = false
var score: Double = 0
for (i, theObservation) in observation.overrides {
let overrideScore = getScore(
eventCount: theObservation.count, totalCount: observation.count,
eventTimestamp: theObservation.timestamp, timestamp: timestamp, lambda: mutDecayExponent
)
if overrideScore == 0.0 { continue }
if overrideScore > score {
candidates.append((headReading, .init(value: i, score: overrideScore)))
forceHighScoreOverride = theObservation.forceHighScoreOverride
score = overrideScore
}
}
return .init(candidates: candidates, forceHighScoreOverride: forceHighScoreOverride)
}
private func getScore(
eventCount: Int,
totalCount: Int,
eventTimestamp: Double,
timestamp: Double,
lambda: Double
) -> Double {
let decay = exp((timestamp - eventTimestamp) * lambda)
if decay < kDecayThreshold { return 0.0 }
let prob = Double(eventCount) / Double(totalCount)
return prob * decay
}
private static func isPunctuation(_ node: Megrez.Compositor.Node) -> Bool {
for key in node.keyArray {
guard let firstChar = key.first else { continue }
return String(firstChar) == "_"
}
return false
}
private static func formObservationKey(
walkedNodes: [Megrez.Compositor.Node], headIndex cursorIndex: Int, readingOnly: Bool = false
) -> String {
let whiteList = "你他妳她祢衪它牠再在"
var arrNodes: [Megrez.Compositor.Node] = []
var intLength = 0
for theNodeAnchor in walkedNodes {
arrNodes.append(theNodeAnchor)
intLength += theNodeAnchor.spanLength
if intLength >= cursorIndex {
break
}
}
if arrNodes.isEmpty { return "" }
arrNodes = Array(arrNodes.reversed())
let kvCurrent = arrNodes[0].currentPair
guard !kvCurrent.key.contains("_") else {
return ""
}
//
if kvCurrent.key.split(separator: "-").count != kvCurrent.value.count { return "" }
//
let strCurrent = kvCurrent.key
var kvPrevious = Megrez.KeyValuePaired()
var kvAnterior = Megrez.KeyValuePaired()
var readingStack = ""
var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" }
var result: String {
// kvCurrent
if readingStack.contains("_")
|| (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value))
{
return ""
} else {
return (readingOnly ? strCurrent : trigramKey)
}
}
if arrNodes.count >= 2,
!kvPrevious.key.contains("_"),
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
{
kvPrevious = arrNodes[1].currentPair
readingStack = kvPrevious.key + readingStack
}
if arrNodes.count >= 3,
!kvAnterior.key.contains("_"),
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
{
kvAnterior = arrNodes[2].currentPair
readingStack = kvAnterior.key + readingStack
}
return result
}
} }