UOM // Do not make unigram keys for candidates of single kanji.

This commit is contained in:
ShikiSuen 2022-06-23 08:45:09 +08:00
parent be7a5674e6
commit 8dad8283f6
1 changed files with 17 additions and 10 deletions

View File

@ -131,6 +131,7 @@ extension vChewing {
walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false
) -> String { ) -> String {
let arrEndingPunctuation = ["", "", "", "", "", "", "", ""] let arrEndingPunctuation = ["", "", "", "", "", "", "", ""]
let whiteList = "你他妳她祢她它牠再在"
var arrNodes: [Megrez.NodeAnchor] = [] var arrNodes: [Megrez.NodeAnchor] = []
var intLength = 0 var intLength = 0
for theNodeAnchor in walkedAnchors { for theNodeAnchor in walkedAnchors {
@ -156,30 +157,36 @@ extension vChewing {
// //
let strCurrent = kvCurrent.key let strCurrent = kvCurrent.key
var kvPrevious = Megrez.KeyValuePaired()
var strPrevious = "()" var kvAnterior = Megrez.KeyValuePaired()
var strAnterior = "()"
var readingStack = "" var readingStack = ""
var trigramKey: String { "(\(strAnterior),\(strPrevious),\(strCurrent))" } var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" }
var result: String { var result: String {
readingStack.contains("_") ? "" : (readingOnly ? strCurrent : trigramKey) // kvCurrent
if readingStack.contains("_")
|| (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value))
{
return ""
} else {
return (readingOnly ? strCurrent : trigramKey)
}
} }
if arrNodes.count >= 2, if arrNodes.count >= 2,
let kvPrevious = arrNodes[1].node?.currentKeyValue, let kvPreviousThisOne = arrNodes[1].node?.currentKeyValue,
!arrEndingPunctuation.contains(kvPrevious.value), !arrEndingPunctuation.contains(kvPrevious.value),
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
{ {
strPrevious = "(\(kvPrevious.key),\(kvPrevious.value))" kvPrevious = kvPreviousThisOne
readingStack = kvPrevious.key + readingStack readingStack = kvPrevious.key + readingStack
} }
if arrNodes.count >= 3, if arrNodes.count >= 3,
let kvAnterior = arrNodes[2].node?.currentKeyValue, let kvAnteriorThisOne = arrNodes[2].node?.currentKeyValue,
!arrEndingPunctuation.contains(kvAnterior.value), !arrEndingPunctuation.contains(kvAnterior.value),
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
{ {
strAnterior = "(\(kvAnterior.key),\(kvAnterior.value))" kvAnterior = kvAnteriorThisOne
readingStack = kvAnterior.key + readingStack readingStack = kvAnterior.key + readingStack
} }
@ -282,7 +289,7 @@ extension vChewing.LMUserOverride {
let decoder = JSONDecoder() let decoder = JSONDecoder()
do { do {
let data = try Data(contentsOf: fileURL, options: .mappedIfSafe) let data = try Data(contentsOf: fileURL, options: .mappedIfSafe)
guard let jsonResult = try? decoder.decode(Dictionary<String, KeyObservationPair>.self, from: data) else { guard let jsonResult = try? decoder.decode([String: KeyObservationPair].self, from: data) else {
IME.prtDebugIntel("UOM Error: Read file content type invalid, abort loading.") IME.prtDebugIntel("UOM Error: Read file content type invalid, abort loading.")
return return
} }