UOM // Do not make unigram keys for candidates of single kanji.
This commit is contained in:
parent
be7a5674e6
commit
8dad8283f6
|
@ -131,6 +131,7 @@ extension vChewing {
|
||||||
walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false
|
walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false
|
||||||
) -> String {
|
) -> String {
|
||||||
let arrEndingPunctuation = [",", "。", "!", "?", "」", "』", "”", "’"]
|
let arrEndingPunctuation = [",", "。", "!", "?", "」", "』", "”", "’"]
|
||||||
|
let whiteList = "你他妳她祢她它牠再在"
|
||||||
var arrNodes: [Megrez.NodeAnchor] = []
|
var arrNodes: [Megrez.NodeAnchor] = []
|
||||||
var intLength = 0
|
var intLength = 0
|
||||||
for theNodeAnchor in walkedAnchors {
|
for theNodeAnchor in walkedAnchors {
|
||||||
|
@ -156,30 +157,36 @@ extension vChewing {
|
||||||
|
|
||||||
// 前置單元只記錄讀音,在其後的單元則同時記錄讀音與字詞
|
// 前置單元只記錄讀音,在其後的單元則同時記錄讀音與字詞
|
||||||
let strCurrent = kvCurrent.key
|
let strCurrent = kvCurrent.key
|
||||||
|
var kvPrevious = Megrez.KeyValuePaired()
|
||||||
var strPrevious = "()"
|
var kvAnterior = Megrez.KeyValuePaired()
|
||||||
var strAnterior = "()"
|
|
||||||
var readingStack = ""
|
var readingStack = ""
|
||||||
var trigramKey: String { "(\(strAnterior),\(strPrevious),\(strCurrent))" }
|
var trigramKey: String { "(\(kvAnterior.toNGramKey),\(kvPrevious.toNGramKey),\(strCurrent))" }
|
||||||
var result: String {
|
var result: String {
|
||||||
readingStack.contains("_") ? "" : (readingOnly ? strCurrent : trigramKey)
|
// 不要把單個漢字的 kvCurrent 當前鍵值領頭的單元圖記入資料庫,不然對敲字體驗破壞太大。
|
||||||
|
if readingStack.contains("_")
|
||||||
|
|| (!kvPrevious.isValid && kvCurrent.value.count == 1 && !whiteList.contains(kvCurrent.value))
|
||||||
|
{
|
||||||
|
return ""
|
||||||
|
} else {
|
||||||
|
return (readingOnly ? strCurrent : trigramKey)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if arrNodes.count >= 2,
|
if arrNodes.count >= 2,
|
||||||
let kvPrevious = arrNodes[1].node?.currentKeyValue,
|
let kvPreviousThisOne = arrNodes[1].node?.currentKeyValue,
|
||||||
!arrEndingPunctuation.contains(kvPrevious.value),
|
!arrEndingPunctuation.contains(kvPrevious.value),
|
||||||
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
|
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
|
||||||
{
|
{
|
||||||
strPrevious = "(\(kvPrevious.key),\(kvPrevious.value))"
|
kvPrevious = kvPreviousThisOne
|
||||||
readingStack = kvPrevious.key + readingStack
|
readingStack = kvPrevious.key + readingStack
|
||||||
}
|
}
|
||||||
|
|
||||||
if arrNodes.count >= 3,
|
if arrNodes.count >= 3,
|
||||||
let kvAnterior = arrNodes[2].node?.currentKeyValue,
|
let kvAnteriorThisOne = arrNodes[2].node?.currentKeyValue,
|
||||||
!arrEndingPunctuation.contains(kvAnterior.value),
|
!arrEndingPunctuation.contains(kvAnterior.value),
|
||||||
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
|
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
|
||||||
{
|
{
|
||||||
strAnterior = "(\(kvAnterior.key),\(kvAnterior.value))"
|
kvAnterior = kvAnteriorThisOne
|
||||||
readingStack = kvAnterior.key + readingStack
|
readingStack = kvAnterior.key + readingStack
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,7 +289,7 @@ extension vChewing.LMUserOverride {
|
||||||
let decoder = JSONDecoder()
|
let decoder = JSONDecoder()
|
||||||
do {
|
do {
|
||||||
let data = try Data(contentsOf: fileURL, options: .mappedIfSafe)
|
let data = try Data(contentsOf: fileURL, options: .mappedIfSafe)
|
||||||
guard let jsonResult = try? decoder.decode(Dictionary<String, KeyObservationPair>.self, from: data) else {
|
guard let jsonResult = try? decoder.decode([String: KeyObservationPair].self, from: data) else {
|
||||||
IME.prtDebugIntel("UOM Error: Read file content type invalid, abort loading.")
|
IME.prtDebugIntel("UOM Error: Read file content type invalid, abort loading.")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue