LMAssembly // Integrate EtenDOS SCPC data into the codebase.

This commit is contained in:
ShikiSuen 2024-02-23 12:27:23 +08:00
parent e44843e603
commit 3ebb5f2f48
11 changed files with 1400 additions and 93 deletions

View File

@ -26,10 +26,6 @@ let package = Package(
.product(name: "Megrez", package: "vChewing_Megrez"),
.product(name: "Shared", package: "vChewing_Shared"),
.product(name: "PinyinPhonaConverter", package: "vChewing_PinyinPhonaConverter"),
],
resources: [
.process("Resources/sequenceDataFromEtenDOS-chs.json"),
.process("Resources/sequenceDataFromEtenDOS-cht.json"),
]
),
.testTarget(

View File

@ -64,8 +64,9 @@ public extension LMAssembly {
lmUserOverride = .init(dataURL: uomDataURL)
}
public func setOptions(handler: (inout Config) -> Void) {
@discardableResult public func setOptions(handler: (inout Config) -> Void) -> LMInstantiator {
handler(&config)
return self
}
@discardableResult public static func connectSQLDB(dbPath: String, dropPreviousConnection: Bool = true) -> Bool {
@ -97,6 +98,7 @@ public extension LMAssembly {
// currentCassetteMetadata
static var lmCassette = LMCassette()
static var lmPlainBopomofo = LMPlainBopomofo()
// 使
// 使使
@ -111,7 +113,6 @@ public extension LMAssembly {
)
var lmReplacements = LMReplacements()
var lmAssociates = LMAssociates()
var lmPlainBopomofo = LMPlainBopomofo()
//
var lmUserOverride: LMUserOverride
@ -189,23 +190,6 @@ public extension LMAssembly {
}
}
public func loadSCPCSequencesData() {
let fileName = !isCHS ? "sequenceDataFromEtenDOS-cht" : "sequenceDataFromEtenDOS-chs"
guard let path = Bundle.module.path(forResource: fileName, ofType: "json") else {
vCLog("lmPlainBopomofo: File name access failure: \(fileName)")
return
}
DispatchQueue.main.async {
if FileManager.default.isReadableFile(atPath: path) {
self.lmPlainBopomofo.clear()
self.lmPlainBopomofo.open(path)
vCLog("lmPlainBopomofo: \(self.lmPlainBopomofo.count) entries of data loaded from: \(path)")
} else {
vCLog("lmPlainBopomofo: File access failure: \(path)")
}
}
}
public var isCassetteDataLoaded: Bool { Self.lmCassette.isLoaded }
public static func loadCassetteData(path: String) {
DispatchQueue.main.async {
@ -340,7 +324,9 @@ public extension LMAssembly {
// 使
if config.isSCPCEnabled {
rawAllUnigrams += lmPlainBopomofo.valuesFor(key: keyChain).map { Megrez.Unigram(value: $0, score: 0) }
rawAllUnigrams += Self.lmPlainBopomofo.valuesFor(key: keyChain, isCHS: isCHS).map {
Megrez.Unigram(value: $0, score: 0)
}
}
// reversed 使

View File

@ -9,60 +9,35 @@
import Foundation
import Shared
public extension LMAssembly {
@frozen struct LMPlainBopomofo {
public private(set) var filePath: String?
var dataMap: [String: String] = [:]
extension LMAssembly {
struct LMPlainBopomofo {
@usableFromInline typealias DataMap = [String: [String: String]]
let dataMap: DataMap
public var count: Int { dataMap.count }
public init() {
dataMap = [:]
do {
let rawData = jsnEtenDosSequence.data(using: .utf8) ?? .init([])
let rawJSON = try JSONDecoder().decode([String: [String: String]].self, from: rawData)
dataMap = rawJSON
} catch {
vCLog("\(error)")
vCLog("↑ Exception happened when parsing raw JSON sequence data from vChewing LMAssembly.")
dataMap = [:]
}
}
public var isLoaded: Bool { !dataMap.isEmpty }
@discardableResult public mutating func open(_ path: String) -> Bool {
if isLoaded { return false }
let oldPath = filePath
filePath = nil
do {
let rawData = try Data(contentsOf: URL(fileURLWithPath: path))
let rawJSON = try JSONDecoder().decode([String: String].self, from: rawData)
dataMap = rawJSON
} catch {
filePath = oldPath
vCLog("\(error)")
vCLog("↑ Exception happened when reading JSON file at: \(path).")
return false
}
filePath = path
return true
}
public mutating func clear() {
filePath = nil
dataMap.removeAll()
}
public func saveData() {
guard let filePath = filePath, let plistURL = URL(string: filePath) else { return }
do {
let plistData = try PropertyListSerialization.data(fromPropertyList: dataMap, format: .binary, options: 0)
try plistData.write(to: plistURL)
} catch {
vCLog("Failed to save current database to: \(filePath)")
}
}
public func valuesFor(key: String) -> [String] {
public func valuesFor(key: String, isCHS: Bool) -> [String] {
var pairs: [String] = []
if let arrRangeRecords: String = dataMap[key]?.trimmingCharacters(in: .newlines) {
let subKey = isCHS ? "S" : "T"
if let arrRangeRecords: String = dataMap[key]?[subKey] {
pairs.append(contentsOf: arrRangeRecords.map(\.description))
}
return pairs.deduplicated
//
return pairs
}
public func hasValuesFor(key: String) -> Bool { dataMap.keys.contains(key) }

View File

@ -0,0 +1,36 @@
//// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// StringView Ranges extension by (c) 2022 and onwards Isaac Xen (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
// ... with NTL restriction stating that:
// No trademark license is granted to use the trade names, trademarks, service
// marks, or product names of Contributor, except as required to fulfill notice
// requirements defined in MIT License.
import Foundation
import XCTest
@testable import LangModelAssembly
final class LMPlainBPMFTests: XCTestCase {
func testLMPlainBPMFDataQuery() throws {
let instance1 = LMAssembly.LMInstantiator(isCHS: false).setOptions { config in
config.isSCPCEnabled = true
}
var liu2 = instance1.unigramsFor(keyArray: ["ㄌㄧㄡˊ"]).map(\.value).prefix(3)
var bao3 = instance1.unigramsFor(keyArray: ["ㄅㄠˇ"]).map(\.value).prefix(3)
var jie2 = instance1.unigramsFor(keyArray: ["ㄐㄧㄝˊ"]).map(\.value).prefix(3)
XCTAssertEqual(liu2, ["", "", ""])
XCTAssertEqual(bao3, ["", "", ""])
XCTAssertEqual(jie2, ["", "", ""])
let instance2 = LMAssembly.LMInstantiator(isCHS: true).setOptions { config in
config.isSCPCEnabled = true
}
liu2 = instance2.unigramsFor(keyArray: ["ㄌㄧㄡˊ"]).map(\.value).prefix(3)
bao3 = instance2.unigramsFor(keyArray: ["ㄅㄠˇ"]).map(\.value).prefix(3)
jie2 = instance2.unigramsFor(keyArray: ["ㄐㄧㄝˊ"]).map(\.value).prefix(3)
XCTAssertEqual(liu2, ["", "", ""])
XCTAssertEqual(bao3, ["", "", ""])
XCTAssertEqual(jie2, ["", "", ""])
}
}

View File

@ -81,7 +81,6 @@ public class LMMgr {
if PrefMgr.shared.associatedPhrasesEnabled { Self.loadUserAssociatesData() }
if PrefMgr.shared.phraseReplacementEnabled { Self.loadUserPhraseReplacement() }
if PrefMgr.shared.useSCPCTypingMode { Self.loadSCPCSequencesData() }
CandidateNode.load(url: Self.userSymbolMenuDataURL())
return
@ -125,12 +124,6 @@ public class LMMgr {
}
}
public static func loadSCPCSequencesData() {
Shared.InputMode.validCases.forEach { mode in
mode.langModel.loadSCPCSequencesData()
}
}
public static func reloadUserFilterDirectly(mode: Shared.InputMode) {
mode.langModel.reloadUserFilterDirectly(path: userDictDataURL(mode: mode, type: .theFilter).path)
}

View File

@ -337,18 +337,17 @@ import SwiftExtension
@AppProperty(key: UserDef.kUseSCPCTypingMode.rawValue, defaultValue: false)
public dynamic var useSCPCTypingMode: Bool {
willSet {
if newValue {
LMMgr.loadSCPCSequencesData()
LMMgr.syncLMPrefs()
}
didSet {
LMMgr.syncLMPrefs()
}
}
@AppProperty(key: UserDef.kPhraseReplacementEnabled.rawValue, defaultValue: false)
public dynamic var phraseReplacementEnabled: Bool {
willSet {
didSet {
LMMgr.syncLMPrefs()
}
willSet {
if newValue {
LMMgr.loadUserPhraseReplacement()
}
@ -357,6 +356,9 @@ import SwiftExtension
@AppProperty(key: UserDef.kAssociatedPhrasesEnabled.rawValue, defaultValue: false)
public dynamic var associatedPhrasesEnabled: Bool {
didSet {
LMMgr.syncLMPrefs()
}
willSet {
if newValue {
LMMgr.loadUserAssociatesData()

View File

@ -83,11 +83,6 @@ public extension SettingsPanesCocoa {
SpeechSputnik.shared.refreshStatus()
}
@IBAction func updateSCPCSettingsAction(_: NSControl) {
guard PrefMgr.shared.useSCPCTypingMode else { return }
LMMgr.loadSCPCSequencesData()
}
@IBAction func updateUiLanguageAction(_ sender: NSPopUpButton) {
let language = languages[sender.indexOfSelectedItem]
guard let bundleID = Bundle.main.bundleIdentifier, bundleID.contains("vChewing") else {

View File

@ -98,12 +98,7 @@ public struct VwrSettingsPaneGeneral: View {
UserDef.kKeepReadingUponCompositionError.bind($keepReadingUponCompositionError).render()
UserDef.kClassicHaninKeyboardSymbolModeShortcutEnabled
.bind($classicHaninKeyboardSymbolModeShortcutEnabled).render()
UserDef.kUseSCPCTypingMode.bind(
$useSCPCTypingMode.didChange {
guard useSCPCTypingMode else { return }
LMMgr.loadSCPCSequencesData()
}
).render()
UserDef.kUseSCPCTypingMode.bind($useSCPCTypingMode).render()
if Date.isTodayTheDate(from: 0401) {
UserDef.kShouldNotFartInLieuOfBeep.bind(
$shouldNotFartInLieuOfBeep.didChange { onFartControlChange() }