UOM // Do not make unigram keys for candidates of single kanji.

This commit is contained in:
ShikiSuen 2022-06-23 08:45:09 +08:00
parent be7a5674e6
commit 8dad8283f6
1 changed files with 17 additions and 10 deletions

View File

@ -131,6 +131,7 @@ extension vChewing {
walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false
) -> String {
let arrEndingPunctuation = ["", "", "", "", "", "", "", ""]
let whiteList = "你他妳她祢她它牠再在"
var arrNodes: [Megrez.NodeAnchor] = []
var intLength = 0
for theNodeAnchor in walkedAnchors {
@ -156,30 +157,36 @@ extension vChewing {
//
let strCurrent = kvCurrent.key
var strPrevious = "()"
var strAnterior = "()"
var kvPrevious = Megrez.KeyValuePaired()
var kvAnterior = Megrez.KeyValuePaired()
var readingStack = ""
var trigramKey: String { "(\(strAnterior),\(strPrevious),\(strCurrent))" }
var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" }
var result: String {
readingStack.contains("_") ? "" : (readingOnly ? strCurrent : trigramKey)
// kvCurrent
if readingStack.contains("_")
|| (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value))
{
return ""
} else {
return (readingOnly ? strCurrent : trigramKey)
}
}
if arrNodes.count >= 2,
let kvPrevious = arrNodes[1].node?.currentKeyValue,
let kvPreviousThisOne = arrNodes[1].node?.currentKeyValue,
!arrEndingPunctuation.contains(kvPrevious.value),
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
{
strPrevious = "(\(kvPrevious.key),\(kvPrevious.value))"
kvPrevious = kvPreviousThisOne
readingStack = kvPrevious.key + readingStack
}
if arrNodes.count >= 3,
let kvAnterior = arrNodes[2].node?.currentKeyValue,
let kvAnteriorThisOne = arrNodes[2].node?.currentKeyValue,
!arrEndingPunctuation.contains(kvAnterior.value),
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
{
strAnterior = "(\(kvAnterior.key),\(kvAnterior.value))"
kvAnterior = kvAnteriorThisOne
readingStack = kvAnterior.key + readingStack
}
@ -282,7 +289,7 @@ extension vChewing.LMUserOverride {
let decoder = JSONDecoder()
do {
let data = try Data(contentsOf: fileURL, options: .mappedIfSafe)
guard let jsonResult = try? decoder.decode(Dictionary<String, KeyObservationPair>.self, from: data) else {
guard let jsonResult = try? decoder.decode([String: KeyObservationPair].self, from: data) else {
IME.prtDebugIntel("UOM Error: Read file content type invalid, abort loading.")
return
}