LMInstantiator // Simplifying result-generation process.

This commit is contained in:
ShikiSuen 2022-05-02 10:53:16 +08:00
parent cb8bb2a7bb
commit ca970e46c9
1 changed files with 38 additions and 80 deletions

View File

@ -45,13 +45,12 @@ extension vChewing {
/// 1) Get the original unigrams.
/// 2) Drop the unigrams whose value is contained in the exclusion map.
/// 3) Replace the values of the unigrams using the phrase replacement map.
/// 4) Replace the values of the unigrams using an external converter lambda.
/// 5) Drop the duplicated phrases.
/// 4) Drop the duplicated phrases from the generated unigram array.
///
/// The controller can ask the model to load the primary input method language
/// model while launching and to load the user phrases anytime if the custom
/// files are modified. It does not keep the reference of the data pathes but
/// you have to pass the paths when you ask it to do loading.
/// you have to pass the paths when you ask it to load.
public class LMInstantiator: Megrez.LanguageModel {
//
public var isPhraseReplacementEnabled = false
@ -63,13 +62,13 @@ extension vChewing {
/// Reverse
let lmCore = LMCore(reverse: false, consolidate: false, defaultScore: -9.5, forceDefaultScore: false)
let lmMisc = LMCore(reverse: true, consolidate: false, defaultScore: -1, forceDefaultScore: false)
let lmSymbols = LMLite(defaultScore: -13.0, consolidate: true)
let lmCNS = LMLite(defaultScore: -11.0, consolidate: true)
let lmSymbols = LMLite(consolidate: true)
let lmCNS = LMLite(consolidate: true)
// 使
let lmUserPhrases = LMLite(defaultScore: 0.0, consolidate: true)
let lmFiltered = LMLite(defaultScore: 0.0, consolidate: true)
let lmUserSymbols = LMLite(defaultScore: -12.0, consolidate: true)
let lmUserPhrases = LMLite(consolidate: true)
let lmFiltered = LMLite(consolidate: true)
let lmUserSymbols = LMLite(consolidate: true)
let lmReplacements = LMReplacments()
let lmAssociates = LMAssociates()
@ -173,87 +172,47 @@ extension vChewing {
return [spaceUnigram]
}
///
var coreUnigrams: [Megrez.Unigram] = []
var miscUnigrams: [Megrez.Unigram] = []
var symbolUnigrams: [Megrez.Unigram] = []
var userUnigrams: [Megrez.Unigram] = []
var userSymbolUnigrams: [Megrez.Unigram] = []
var cnsUnigrams: [Megrez.Unigram] = []
///
var rawAllUnigrams: [Megrez.Unigram] = []
var insertedPairs: Set<Megrez.KeyValuePair> = [] //
var filteredPairs: Set<Megrez.KeyValuePair> = []
// reversed 使
//
// rawUserUnigrams
rawAllUnigrams += lmUserPhrases.unigramsFor(key: key, score: 0.0).reversed()
//
let filteredUnigrams: [Megrez.Unigram] =
lmFiltered.hasUnigramsFor(key: key) ? lmFiltered.unigramsFor(key: key) : []
for unigram in filteredUnigrams {
filteredPairs.insert(unigram.keyValue)
}
// LMMisc LMCore score (-10.0, 0.0)
rawAllUnigrams += lmMisc.unigramsFor(key: key)
rawAllUnigrams += lmCore.unigramsFor(key: key)
if lmUserPhrases.hasUnigramsFor(key: key) {
var rawUserUnigrams: [Megrez.Unigram] = []
// reversed 使
//
// rawUserUnigrams
rawUserUnigrams.append(contentsOf: lmUserPhrases.unigramsFor(key: key).reversed())
userUnigrams = filterAndTransform(
unigrams: rawUserUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
}
if lmUserPhrases.hasUnigramsFor(key: key) {
let rawUserUnigrams: [Megrez.Unigram] = lmUserPhrases.unigramsFor(key: key)
userUnigrams = filterAndTransform(
unigrams: rawUserUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
}
if lmMisc.hasUnigramsFor(key: key) {
let rawMiscUnigrams: [Megrez.Unigram] = lmMisc.unigramsFor(key: key)
miscUnigrams = filterAndTransform(
unigrams: rawMiscUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
}
if lmCore.hasUnigramsFor(key: key) {
let rawCoreUnigrams: [Megrez.Unigram] = lmCore.unigramsFor(key: key)
coreUnigrams = filterAndTransform(
unigrams: rawCoreUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
if isCNSEnabled {
rawAllUnigrams += lmCNS.unigramsFor(key: key, score: -11)
}
if isSymbolEnabled {
if lmUserSymbols.hasUnigramsFor(key: key) {
let rawUserSymbolUnigrams: [Megrez.Unigram] = lmUserSymbols.unigramsFor(key: key)
userSymbolUnigrams = filterAndTransform(
unigrams: rawUserSymbolUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
} else {
rawAllUnigrams += lmUserSymbols.unigramsFor(key: key, score: -12.0)
if lmUserSymbols.unigramsFor(key: key).isEmpty {
IME.prtDebugIntel("Not found in UserSymbolUnigram: \(key)")
}
if lmSymbols.hasUnigramsFor(key: key) {
let rawSymbolUnigrams: [Megrez.Unigram] = lmSymbols.unigramsFor(key: key)
symbolUnigrams = filterAndTransform(
unigrams: rawSymbolUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
} else {
rawAllUnigrams += lmSymbols.unigramsFor(key: key, score: -11.0)
if lmSymbols.unigramsFor(key: key).isEmpty {
IME.prtDebugIntel("Not found in UserUnigram: \(key)")
}
}
if lmCNS.hasUnigramsFor(key: key), isCNSEnabled {
let rawCNSUnigrams: [Megrez.Unigram] = lmCNS.unigramsFor(key: key)
cnsUnigrams = filterAndTransform(
unigrams: rawCNSUnigrams, filter: filteredPairs, inserted: &insertedPairs
)
//
var insertedPairs: Set<Megrez.KeyValuePair> = [] //
var filteredPairs: Set<Megrez.KeyValuePair> = [] //
// KeyValuePair
for unigram in lmFiltered.unigramsFor(key: key) {
filteredPairs.insert(unigram.keyValue)
}
let allUnigrams: [Megrez.Unigram] =
userUnigrams + miscUnigrams + coreUnigrams + cnsUnigrams + userSymbolUnigrams + symbolUnigrams
return allUnigrams
return filterAndTransform(
unigrams: rawAllUnigrams,
filter: filteredPairs, inserted: &insertedPairs
)
}
/// If the model has unigrams for the given key.
@ -286,16 +245,15 @@ extension vChewing {
var results: [Megrez.Unigram] = []
for unigram in unigrams {
let pairToDealWith: Megrez.KeyValuePair = unigram.keyValue
if filteredPairs.contains(pairToDealWith) {
var pair: Megrez.KeyValuePair = unigram.keyValue
if filteredPairs.contains(pair) {
continue
}
var pair: Megrez.KeyValuePair = pairToDealWith
if isPhraseReplacementEnabled {
let replacement = lmReplacements.valuesFor(key: pair.key)
if !replacement.isEmpty {
IME.prtDebugIntel(replacement)
let replacement = lmReplacements.valuesFor(key: pair.value)
if !replacement.isEmpty, pair.value.count == replacement.count {
IME.prtDebugIntel("\(pair.value) -> \(replacement)")
pair.value = replacement
}
}