LM // Swiftify: LMInstantiator.
This commit is contained in:
parent
1b4b4149a0
commit
887907fb11
|
@ -0,0 +1,311 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Refactored from the ObjCpp-version of this class by:
|
||||
// (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// NOTE: We still keep some of the comments left by Zonble,
|
||||
// regardless that he is not in charge of this Swift module。
|
||||
|
||||
import Foundation
|
||||
|
||||
extension vChewing {
|
||||
/// LMInstantiator is a facade for managing a set of models including
|
||||
/// the input method language model, user phrases and excluded phrases.
|
||||
///
|
||||
/// It is the primary model class that the input controller and grammar builder
|
||||
/// of vChewing talks to. When the grammar builder starts to build a sentence
|
||||
/// from a series of BPMF readings, it passes the readings to the model to see
|
||||
/// if there are valid unigrams, and use returned unigrams to produce the final
|
||||
/// results.
|
||||
///
|
||||
/// LMInstantiator combine and transform the unigrams from the primary language
|
||||
/// model and user phrases. The process is
|
||||
///
|
||||
/// 1) Get the original unigrams.
|
||||
/// 2) Drop the unigrams whose value is contained in the exclusion map.
|
||||
/// 3) Replace the values of the unigrams using the phrase replacement map.
|
||||
/// 4) Replace the values of the unigrams using an external converter lambda.
|
||||
/// 5) Drop the duplicated phrases.
|
||||
///
|
||||
/// The controller can ask the model to load the primary input method language
|
||||
/// model while launching and to load the user phrases anytime if the custom
|
||||
/// files are modified. It does not keep the reference of the data pathes but
|
||||
/// you have to pass the paths when you ask it to do loading.
|
||||
public class LMInstantiator: Megrez.LanguageModel {
|
||||
// 在函數內部用以記錄狀態的開關。
|
||||
public var isPhraseReplacementEnabled = false
|
||||
public var isCNSEnabled = false
|
||||
public var isSymbolEnabled = false
|
||||
|
||||
// 聲明原廠語言模組
|
||||
/// Reverse 的話,第一欄是注音,第二欄是對應的漢字,第三欄是可能的權重。
|
||||
/// 不 Reverse 的話,第一欄是漢字,第二欄是對應的注音,第三欄是可能的權重。
|
||||
let lmCore = LMCore(reverse: false, consolidate: false, defaultScore: -9.5, forceDefaultScore: false)
|
||||
let lmMisc = LMCore(reverse: true, consolidate: false, defaultScore: -1, forceDefaultScore: false)
|
||||
let lmSymbols = LMLite(defaultScore: -13.0, consolidate: true)
|
||||
let lmCNS = LMLite(defaultScore: -11.0, consolidate: true)
|
||||
|
||||
// 聲明使用者語言模組
|
||||
let lmUserPhrases = LMLite(defaultScore: 0.0, consolidate: true)
|
||||
let lmFiltered = LMLite(defaultScore: 0.0, consolidate: true)
|
||||
let lmUserSymbols = LMLite(defaultScore: -12.0, consolidate: true)
|
||||
let lmReplacements = LMReplacments()
|
||||
let lmAssociates = LMAssociates()
|
||||
|
||||
// 初期化的函數先保留
|
||||
override init() {}
|
||||
|
||||
// 自我析構前要關掉全部的語言模組
|
||||
deinit {
|
||||
lmCore.close()
|
||||
lmMisc.close()
|
||||
lmSymbols.close()
|
||||
lmCNS.close()
|
||||
lmUserPhrases.close()
|
||||
lmFiltered.close()
|
||||
lmUserSymbols.close()
|
||||
lmReplacements.close()
|
||||
lmAssociates.close()
|
||||
}
|
||||
|
||||
// 以下這些函數命名暫時保持原樣,等弒神行動徹底結束了再調整。
|
||||
|
||||
public func isDataModelLoaded() -> Bool { lmCore.isLoaded() }
|
||||
public func loadLanguageModel(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmCore.close()
|
||||
lmCore.open(path)
|
||||
}
|
||||
}
|
||||
|
||||
public func isCNSDataLoaded() -> Bool { lmCNS.isLoaded() }
|
||||
public func loadCNSData(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmCNS.close()
|
||||
lmCNS.open(path)
|
||||
}
|
||||
}
|
||||
|
||||
public func isMiscDataLoaded() -> Bool { lmMisc.isLoaded() }
|
||||
public func loadMiscData(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmMisc.close()
|
||||
lmMisc.open(path)
|
||||
}
|
||||
}
|
||||
|
||||
public func isSymbolDataLoaded() -> Bool { lmSymbols.isLoaded() }
|
||||
public func loadSymbolData(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmSymbols.close()
|
||||
lmSymbols.open(path)
|
||||
}
|
||||
}
|
||||
|
||||
public func loadUserPhrases(path: String, filterPath: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmUserPhrases.close()
|
||||
lmUserPhrases.open(path)
|
||||
}
|
||||
if FileManager.default.isReadableFile(atPath: filterPath) {
|
||||
lmFiltered.close()
|
||||
lmFiltered.open(filterPath)
|
||||
}
|
||||
}
|
||||
|
||||
public func loadUserSymbolData(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmUserSymbols.close()
|
||||
lmUserSymbols.open(path)
|
||||
}
|
||||
}
|
||||
|
||||
public func loadUserAssociatedPhrases(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmAssociates.close()
|
||||
lmAssociates.open(path)
|
||||
}
|
||||
}
|
||||
|
||||
public func loadPhraseReplacementMap(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmReplacements.close()
|
||||
lmReplacements.open(path)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Core Functions (Public)
|
||||
|
||||
/// Not implemented since we do not have data to provide bigram function.
|
||||
// public func bigramsForKeys(preceedingKey: String, key: String) -> [Megrez.Bigram] { }
|
||||
|
||||
/// Returns a list of available unigram for the given key.
|
||||
/// @param key:String represents the BPMF reading or a symbol key.
|
||||
/// For instance, it you pass "ㄉㄨㄟˇ", it returns "㨃" and other possible candidates.
|
||||
override open func unigramsFor(key: String) -> [Megrez.Unigram] {
|
||||
if key == " " {
|
||||
/// 給空格鍵指定輸出值。
|
||||
let spaceUnigram = Megrez.Unigram(
|
||||
keyValue: Megrez.KeyValuePair(key: " ", value: " "),
|
||||
score: 0
|
||||
)
|
||||
return [spaceUnigram]
|
||||
}
|
||||
|
||||
/// 準備不同的語言模組容器。
|
||||
var coreUnigrams: [Megrez.Unigram] = []
|
||||
var miscUnigrams: [Megrez.Unigram] = []
|
||||
var symbolUnigrams: [Megrez.Unigram] = []
|
||||
var userUnigrams: [Megrez.Unigram] = []
|
||||
var userSymbolUnigrams: [Megrez.Unigram] = []
|
||||
var cnsUnigrams: [Megrez.Unigram] = []
|
||||
|
||||
var insertedPairs: Set<Megrez.KeyValuePair> = [] // 具體用途有待商榷
|
||||
var filteredPairs: Set<Megrez.KeyValuePair> = []
|
||||
|
||||
// 開始逐漸往容器陣列內塞入資料
|
||||
let filteredUnigrams: [Megrez.Unigram] =
|
||||
lmFiltered.hasUnigramsFor(key: key) ? lmFiltered.unigramsFor(key: key) : []
|
||||
for unigram in filteredUnigrams {
|
||||
filteredPairs.insert(unigram.keyValue)
|
||||
}
|
||||
|
||||
if lmUserPhrases.hasUnigramsFor(key: key) {
|
||||
var rawUserUnigrams: [Megrez.Unigram] = []
|
||||
// 用 reversed 指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。
|
||||
// 這樣一來就可以在就地新增語彙時徹底複寫優先權。
|
||||
// 將兩句差分也是為了讓 rawUserUnigrams 的類型不受可能的影響。
|
||||
rawUserUnigrams.append(contentsOf: lmUserPhrases.unigramsFor(key: key).reversed())
|
||||
userUnigrams = filterAndTransform(
|
||||
unigrams: rawUserUnigrams, filter: filteredPairs, inserted: &insertedPairs
|
||||
)
|
||||
}
|
||||
|
||||
if lmUserPhrases.hasUnigramsFor(key: key) {
|
||||
let rawUserUnigrams: [Megrez.Unigram] = lmUserPhrases.unigramsFor(key: key)
|
||||
userUnigrams = filterAndTransform(
|
||||
unigrams: rawUserUnigrams, filter: filteredPairs, inserted: &insertedPairs
|
||||
)
|
||||
}
|
||||
|
||||
if lmMisc.hasUnigramsFor(key: key) {
|
||||
let rawMiscUnigrams: [Megrez.Unigram] = lmMisc.unigramsFor(key: key)
|
||||
miscUnigrams = filterAndTransform(
|
||||
unigrams: rawMiscUnigrams, filter: filteredPairs, inserted: &insertedPairs
|
||||
)
|
||||
}
|
||||
|
||||
if lmCore.hasUnigramsFor(key: key) {
|
||||
let rawCoreUnigrams: [Megrez.Unigram] = lmCore.unigramsFor(key: key)
|
||||
coreUnigrams = filterAndTransform(
|
||||
unigrams: rawCoreUnigrams, filter: filteredPairs, inserted: &insertedPairs
|
||||
)
|
||||
}
|
||||
|
||||
if isSymbolEnabled {
|
||||
if lmUserSymbols.hasUnigramsFor(key: key) {
|
||||
let rawUserSymbolUnigrams: [Megrez.Unigram] = lmUserSymbols.unigramsFor(key: key)
|
||||
userSymbolUnigrams = filterAndTransform(
|
||||
unigrams: rawUserSymbolUnigrams, filter: filteredPairs, inserted: &insertedPairs
|
||||
)
|
||||
} else {
|
||||
IME.prtDebugIntel("Not found in UserSymbolUnigram: \(key)")
|
||||
}
|
||||
|
||||
if lmSymbols.hasUnigramsFor(key: key) {
|
||||
let rawSymbolUnigrams: [Megrez.Unigram] = lmSymbols.unigramsFor(key: key)
|
||||
symbolUnigrams = filterAndTransform(
|
||||
unigrams: rawSymbolUnigrams, filter: filteredPairs, inserted: &insertedPairs
|
||||
)
|
||||
} else {
|
||||
IME.prtDebugIntel("Not found in UserUnigram: \(key)")
|
||||
}
|
||||
}
|
||||
|
||||
if lmCNS.hasUnigramsFor(key: key), isCNSEnabled {
|
||||
let rawCNSUnigrams: [Megrez.Unigram] = lmCNS.unigramsFor(key: key)
|
||||
cnsUnigrams = filterAndTransform(
|
||||
unigrams: rawCNSUnigrams, filter: filteredPairs, inserted: &insertedPairs
|
||||
)
|
||||
}
|
||||
|
||||
let allUnigrams: [Megrez.Unigram] =
|
||||
userUnigrams + miscUnigrams + coreUnigrams + cnsUnigrams + userSymbolUnigrams + symbolUnigrams
|
||||
|
||||
return allUnigrams
|
||||
}
|
||||
|
||||
/// If the model has unigrams for the given key.
|
||||
/// @param key The key.
|
||||
override open func hasUnigramsFor(key: String) -> Bool {
|
||||
if key == " " { return true }
|
||||
|
||||
if !lmFiltered.hasUnigramsFor(key: key) {
|
||||
return lmUserPhrases.hasUnigramsFor(key: key) || lmCore.hasUnigramsFor(key: key)
|
||||
}
|
||||
|
||||
return !unigramsFor(key: key).isEmpty
|
||||
}
|
||||
|
||||
public func associatedPhrasesForKey(_ key: String) -> [String] {
|
||||
lmAssociates.valuesFor(key: key) ?? []
|
||||
}
|
||||
|
||||
public func hasAssociatedPhrasesForKey(_ key: String) -> Bool {
|
||||
lmAssociates.hasValuesFor(key: key)
|
||||
}
|
||||
|
||||
// MARK: - Core Functions (Private)
|
||||
|
||||
func filterAndTransform(
|
||||
unigrams: [Megrez.Unigram],
|
||||
filter filteredPairs: Set<Megrez.KeyValuePair>,
|
||||
inserted insertedPairs: inout Set<Megrez.KeyValuePair>
|
||||
) -> [Megrez.Unigram] {
|
||||
var results: [Megrez.Unigram] = []
|
||||
|
||||
for unigram in unigrams {
|
||||
let pairToDealWith: Megrez.KeyValuePair = unigram.keyValue
|
||||
if filteredPairs.contains(pairToDealWith) {
|
||||
continue
|
||||
}
|
||||
|
||||
var pair: Megrez.KeyValuePair = pairToDealWith
|
||||
if isPhraseReplacementEnabled {
|
||||
let replacement = lmReplacements.valuesFor(key: pair.key)
|
||||
if !replacement.isEmpty {
|
||||
IME.prtDebugIntel(replacement)
|
||||
pair.value = replacement
|
||||
}
|
||||
}
|
||||
|
||||
if !insertedPairs.contains(pair) {
|
||||
results.append(Megrez.Unigram(keyValue: pair, score: unigram.score))
|
||||
insertedPairs.insert(pair)
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue