LMInstantiator // Simplifying result-generation process.

This commit is contained in:
ShikiSuen 2022-05-02 10:53:16 +08:00
parent cb8bb2a7bb
commit ca970e46c9
1 changed files with 38 additions and 80 deletions

View File

@ -45,13 +45,12 @@ extension vChewing {
/// 1) Get the original unigrams. /// 1) Get the original unigrams.
/// 2) Drop the unigrams whose value is contained in the exclusion map. /// 2) Drop the unigrams whose value is contained in the exclusion map.
/// 3) Replace the values of the unigrams using the phrase replacement map. /// 3) Replace the values of the unigrams using the phrase replacement map.
/// 4) Replace the values of the unigrams using an external converter lambda. /// 4) Drop the duplicated phrases from the generated unigram array.
/// 5) Drop the duplicated phrases.
/// ///
/// The controller can ask the model to load the primary input method language /// The controller can ask the model to load the primary input method language
/// model while launching and to load the user phrases anytime if the custom /// model while launching and to load the user phrases anytime if the custom
/// files are modified. It does not keep the reference of the data pathes but /// files are modified. It does not keep the reference of the data pathes but
/// you have to pass the paths when you ask it to do loading. /// you have to pass the paths when you ask it to load.
public class LMInstantiator: Megrez.LanguageModel { public class LMInstantiator: Megrez.LanguageModel {
// //
public var isPhraseReplacementEnabled = false public var isPhraseReplacementEnabled = false
@ -63,13 +62,13 @@ extension vChewing {
/// Reverse /// Reverse
let lmCore = LMCore(reverse: false, consolidate: false, defaultScore: -9.5, forceDefaultScore: false) let lmCore = LMCore(reverse: false, consolidate: false, defaultScore: -9.5, forceDefaultScore: false)
let lmMisc = LMCore(reverse: true, consolidate: false, defaultScore: -1, forceDefaultScore: false) let lmMisc = LMCore(reverse: true, consolidate: false, defaultScore: -1, forceDefaultScore: false)
let lmSymbols = LMLite(defaultScore: -13.0, consolidate: true) let lmSymbols = LMLite(consolidate: true)
let lmCNS = LMLite(defaultScore: -11.0, consolidate: true) let lmCNS = LMLite(consolidate: true)
// 使 // 使
let lmUserPhrases = LMLite(defaultScore: 0.0, consolidate: true) let lmUserPhrases = LMLite(consolidate: true)
let lmFiltered = LMLite(defaultScore: 0.0, consolidate: true) let lmFiltered = LMLite(consolidate: true)
let lmUserSymbols = LMLite(defaultScore: -12.0, consolidate: true) let lmUserSymbols = LMLite(consolidate: true)
let lmReplacements = LMReplacments() let lmReplacements = LMReplacments()
let lmAssociates = LMAssociates() let lmAssociates = LMAssociates()
@ -173,87 +172,47 @@ extension vChewing {
return [spaceUnigram] return [spaceUnigram]
} }
/// ///
var coreUnigrams: [Megrez.Unigram] = [] var rawAllUnigrams: [Megrez.Unigram] = []
var miscUnigrams: [Megrez.Unigram] = []
var symbolUnigrams: [Megrez.Unigram] = []
var userUnigrams: [Megrez.Unigram] = []
var userSymbolUnigrams: [Megrez.Unigram] = []
var cnsUnigrams: [Megrez.Unigram] = []
var insertedPairs: Set<Megrez.KeyValuePair> = [] //
var filteredPairs: Set<Megrez.KeyValuePair> = []
//
let filteredUnigrams: [Megrez.Unigram] =
lmFiltered.hasUnigramsFor(key: key) ? lmFiltered.unigramsFor(key: key) : []
for unigram in filteredUnigrams {
filteredPairs.insert(unigram.keyValue)
}
if lmUserPhrases.hasUnigramsFor(key: key) {
var rawUserUnigrams: [Megrez.Unigram] = []
// reversed 使 // reversed 使
// //
// rawUserUnigrams // rawUserUnigrams
rawUserUnigrams.append(contentsOf: lmUserPhrases.unigramsFor(key: key).reversed()) rawAllUnigrams += lmUserPhrases.unigramsFor(key: key, score: 0.0).reversed()
userUnigrams = filterAndTransform(
unigrams: rawUserUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
}
if lmUserPhrases.hasUnigramsFor(key: key) { // LMMisc LMCore score (-10.0, 0.0)
let rawUserUnigrams: [Megrez.Unigram] = lmUserPhrases.unigramsFor(key: key) rawAllUnigrams += lmMisc.unigramsFor(key: key)
userUnigrams = filterAndTransform( rawAllUnigrams += lmCore.unigramsFor(key: key)
unigrams: rawUserUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
}
if lmMisc.hasUnigramsFor(key: key) { if isCNSEnabled {
let rawMiscUnigrams: [Megrez.Unigram] = lmMisc.unigramsFor(key: key) rawAllUnigrams += lmCNS.unigramsFor(key: key, score: -11)
miscUnigrams = filterAndTransform(
unigrams: rawMiscUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
}
if lmCore.hasUnigramsFor(key: key) {
let rawCoreUnigrams: [Megrez.Unigram] = lmCore.unigramsFor(key: key)
coreUnigrams = filterAndTransform(
unigrams: rawCoreUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
} }
if isSymbolEnabled { if isSymbolEnabled {
if lmUserSymbols.hasUnigramsFor(key: key) { rawAllUnigrams += lmUserSymbols.unigramsFor(key: key, score: -12.0)
let rawUserSymbolUnigrams: [Megrez.Unigram] = lmUserSymbols.unigramsFor(key: key) if lmUserSymbols.unigramsFor(key: key).isEmpty {
userSymbolUnigrams = filterAndTransform(
unigrams: rawUserSymbolUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
} else {
IME.prtDebugIntel("Not found in UserSymbolUnigram: \(key)") IME.prtDebugIntel("Not found in UserSymbolUnigram: \(key)")
} }
if lmSymbols.hasUnigramsFor(key: key) { rawAllUnigrams += lmSymbols.unigramsFor(key: key, score: -11.0)
let rawSymbolUnigrams: [Megrez.Unigram] = lmSymbols.unigramsFor(key: key) if lmSymbols.unigramsFor(key: key).isEmpty {
symbolUnigrams = filterAndTransform(
unigrams: rawSymbolUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
} else {
IME.prtDebugIntel("Not found in UserUnigram: \(key)") IME.prtDebugIntel("Not found in UserUnigram: \(key)")
} }
} }
if lmCNS.hasUnigramsFor(key: key), isCNSEnabled { //
let rawCNSUnigrams: [Megrez.Unigram] = lmCNS.unigramsFor(key: key) var insertedPairs: Set<Megrez.KeyValuePair> = [] //
cnsUnigrams = filterAndTransform( var filteredPairs: Set<Megrez.KeyValuePair> = [] //
unigrams: rawCNSUnigrams, filter: filteredPairs, inserted: &insertedPairs
) // KeyValuePair
for unigram in lmFiltered.unigramsFor(key: key) {
filteredPairs.insert(unigram.keyValue)
} }
let allUnigrams: [Megrez.Unigram] = return filterAndTransform(
userUnigrams + miscUnigrams + coreUnigrams + cnsUnigrams + userSymbolUnigrams + symbolUnigrams unigrams: rawAllUnigrams,
filter: filteredPairs, inserted: &insertedPairs
return allUnigrams )
} }
/// If the model has unigrams for the given key. /// If the model has unigrams for the given key.
@ -286,16 +245,15 @@ extension vChewing {
var results: [Megrez.Unigram] = [] var results: [Megrez.Unigram] = []
for unigram in unigrams { for unigram in unigrams {
let pairToDealWith: Megrez.KeyValuePair = unigram.keyValue var pair: Megrez.KeyValuePair = unigram.keyValue
if filteredPairs.contains(pairToDealWith) { if filteredPairs.contains(pair) {
continue continue
} }
var pair: Megrez.KeyValuePair = pairToDealWith
if isPhraseReplacementEnabled { if isPhraseReplacementEnabled {
let replacement = lmReplacements.valuesFor(key: pair.key) let replacement = lmReplacements.valuesFor(key: pair.value)
if !replacement.isEmpty { if !replacement.isEmpty, pair.value.count == replacement.count {
IME.prtDebugIntel(replacement) IME.prtDebugIntel("\(pair.value) -> \(replacement)")
pair.value = replacement pair.value = replacement
} }
} }