vChewing-macOS/Source/Modules/LangModelRelated/LMInstantiator.swift

284 lines
13 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// Refactored from the ObjCpp-version of this class by:
// (c) 2011 and onwards The OpenVanilla Project (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
// ... with NTL restriction stating that:
// No trademark license is granted to use the trade names, trademarks, service
// marks, or product names of Contributor, except as required to fulfill notice
// requirements defined in MIT License.
import Foundation
extension vChewing {
/// LMInstantiatorLMI
/// LangModelProtocol 使
///
///
/// LMI 調
/// LMI
///
///
/// LMI
///
/// 1.
/// 2.
/// 3.
/// 4.
///
/// LMI LMI
///
public class LMInstantiator: LangModelProtocol {
//
public var isPhraseReplacementEnabled = false
public var isCNSEnabled = false
public var isSymbolEnabled = false
///
/// ----------------------
/// LMCoreEX key [Unigram]
///
/// LMCoreEX Unigram
/// LMCoreEX 滿
/// LMReplacements LMAssociates 使
/// LMCoreEX 2010-2013 mac
/// LMCoreNS plist
//
// Reverse
// Reverse
var lmCore = LMCoreNS(
reverse: false, consolidate: false, defaultScore: -9.9, forceDefaultScore: false
)
var lmMisc = LMCoreNS(
reverse: true, consolidate: false, defaultScore: -1.0, forceDefaultScore: false
)
//
// 100MB
static var lmCNS = vChewing.LMCoreNS(
reverse: true, consolidate: false, defaultScore: -11.0, forceDefaultScore: false
)
static var lmSymbols = vChewing.LMCoreNS(
reverse: true, consolidate: false, defaultScore: -13.0, forceDefaultScore: false
)
// 使
// 使使
var lmUserPhrases = LMCoreEX(
reverse: true, consolidate: true, defaultScore: 0, forceDefaultScore: false
)
var lmFiltered = LMCoreEX(
reverse: true, consolidate: true, defaultScore: 0, forceDefaultScore: true
)
var lmUserSymbols = LMCoreEX(
reverse: true, consolidate: true, defaultScore: -12.0, forceDefaultScore: true
)
var lmReplacements = LMReplacments()
var lmAssociates = LMAssociates()
var lmPlainBopomofo = LMPlainBopomofo()
// MARK: -
public var isLanguageModelLoaded: Bool { lmCore.isLoaded() }
public func loadLanguageModel(path: String) {
if FileManager.default.isReadableFile(atPath: path) {
lmCore.open(path)
IME.prtDebugIntel("lmCore: \(lmCore.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmCore: File access failure: \(path)")
}
}
public var isCNSDataLoaded: Bool { vChewing.LMInstantiator.lmCNS.isLoaded() }
public func loadCNSData(path: String) {
if FileManager.default.isReadableFile(atPath: path) {
vChewing.LMInstantiator.lmCNS.open(path)
IME.prtDebugIntel("lmCNS: \(vChewing.LMInstantiator.lmCNS.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmCNS: File access failure: \(path)")
}
}
public var isMiscDataLoaded: Bool { lmMisc.isLoaded() }
public func loadMiscData(path: String) {
if FileManager.default.isReadableFile(atPath: path) {
lmMisc.open(path)
IME.prtDebugIntel("lmMisc: \(lmMisc.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmMisc: File access failure: \(path)")
}
}
public var isSymbolDataLoaded: Bool { vChewing.LMInstantiator.lmSymbols.isLoaded() }
public func loadSymbolData(path: String) {
if FileManager.default.isReadableFile(atPath: path) {
vChewing.LMInstantiator.lmSymbols.open(path)
IME.prtDebugIntel("lmSymbol: \(vChewing.LMInstantiator.lmSymbols.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmSymbols: File access failure: \(path)")
}
}
public func loadUserPhrasesData(path: String, filterPath: String) {
if FileManager.default.isReadableFile(atPath: path) {
lmUserPhrases.close()
lmUserPhrases.open(path)
IME.prtDebugIntel("lmUserPhrases: \(lmUserPhrases.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmUserPhrases: File access failure: \(path)")
}
if FileManager.default.isReadableFile(atPath: filterPath) {
lmFiltered.close()
lmFiltered.open(filterPath)
IME.prtDebugIntel("lmFiltered: \(lmFiltered.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmFiltered: File access failure: \(path)")
}
}
public func loadUserSymbolData(path: String) {
if FileManager.default.isReadableFile(atPath: path) {
lmUserSymbols.close()
lmUserSymbols.open(path)
IME.prtDebugIntel("lmUserSymbol: \(lmUserSymbols.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmUserSymbol: File access failure: \(path)")
}
}
public func loadUserAssociatesData(path: String) {
if FileManager.default.isReadableFile(atPath: path) {
lmAssociates.close()
lmAssociates.open(path)
IME.prtDebugIntel("lmAssociates: \(lmAssociates.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmAssociates: File access failure: \(path)")
}
}
public func loadReplacementsData(path: String) {
if FileManager.default.isReadableFile(atPath: path) {
lmReplacements.close()
lmReplacements.open(path)
IME.prtDebugIntel("lmReplacements: \(lmReplacements.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmReplacements: File access failure: \(path)")
}
}
public func loadUserSCPCSequencesData(path: String) {
if FileManager.default.isReadableFile(atPath: path) {
lmPlainBopomofo.close()
lmPlainBopomofo.open(path)
IME.prtDebugIntel("lmPlainBopomofo: \(lmPlainBopomofo.count) entries of data loaded from: \(path)")
} else {
IME.prtDebugIntel("lmPlainBopomofo: File access failure: \(path)")
}
}
// MARK: -
///
// public func bigramsFor(preceedingKey: String, key: String) -> [Megrez.Bigram] { }
/// LMI
/// - Parameter key:
/// - Returns:
public func unigramsFor(key: String) -> [Megrez.Unigram] {
///
if key == " " { return [.init(value: " ")] }
///
var rawAllUnigrams: [Megrez.Unigram] = []
// 使
if mgrPrefs.useSCPCTypingMode {
rawAllUnigrams += lmPlainBopomofo.valuesFor(key: key).map { Megrez.Unigram(value: $0, score: 0) }
}
// reversed 使
//
// rawUserUnigrams
rawAllUnigrams += lmUserPhrases.unigramsFor(key: key).reversed()
// LMMisc LMCore score (-10.0, 0.0)
rawAllUnigrams += lmMisc.unigramsFor(key: key)
rawAllUnigrams += lmCore.unigramsFor(key: key)
if isCNSEnabled {
rawAllUnigrams += vChewing.LMInstantiator.lmCNS.unigramsFor(key: key)
}
if isSymbolEnabled {
rawAllUnigrams += lmUserSymbols.unigramsFor(key: key)
rawAllUnigrams += vChewing.LMInstantiator.lmSymbols.unigramsFor(key: key)
}
//
rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: key))
// Swift 使 NSOrderedSet
var filteredPairs: Set<String> = []
// KeyValuePair
for unigram in lmFiltered.unigramsFor(key: key) {
filteredPairs.insert(unigram.value)
}
return filterAndTransform(
unigrams: rawAllUnigrams,
filter: filteredPairs
)
}
///
/// - Parameter key:
/// - Returns:
public func hasUnigramsFor(key: String) -> Bool {
if key == " " { return true }
if !lmFiltered.hasUnigramsFor(key: key) {
return lmUserPhrases.hasUnigramsFor(key: key) || lmCore.hasUnigramsFor(key: key)
}
return !unigramsFor(key: key).isEmpty
}
public func associatedPhrasesFor(pair: Megrez.Compositor.KeyValuePaired) -> [String] {
lmAssociates.valuesFor(pair: pair)
}
public func hasAssociatedPhrasesFor(pair: Megrez.Compositor.KeyValuePaired) -> Bool {
lmAssociates.hasValuesFor(pair: pair)
}
// MARK: -
///
/// - Parameters:
/// - unigrams:
/// - filteredPairs:
/// - Returns:
func filterAndTransform(
unigrams: [Megrez.Unigram],
filter filteredPairs: Set<String>
) -> [Megrez.Unigram] {
var results: [Megrez.Unigram] = []
var insertedPairs: Set<String> = []
for unigram in unigrams {
var theValue: String = unigram.value
if filteredPairs.contains(theValue) { continue }
if isPhraseReplacementEnabled {
let replacement = lmReplacements.valuesFor(key: theValue)
if !replacement.isEmpty { theValue = replacement }
}
if insertedPairs.contains(theValue) { continue }
results.append(Megrez.Unigram(value: theValue, score: unigram.score))
insertedPairs.insert(theValue)
}
return results
}
}
}