LMAssembly // Integrate EtenDOS SCPC data into the codebase.
This commit is contained in:
parent
e44843e603
commit
3ebb5f2f48
|
@ -26,10 +26,6 @@ let package = Package(
|
|||
.product(name: "Megrez", package: "vChewing_Megrez"),
|
||||
.product(name: "Shared", package: "vChewing_Shared"),
|
||||
.product(name: "PinyinPhonaConverter", package: "vChewing_PinyinPhonaConverter"),
|
||||
],
|
||||
resources: [
|
||||
.process("Resources/sequenceDataFromEtenDOS-chs.json"),
|
||||
.process("Resources/sequenceDataFromEtenDOS-cht.json"),
|
||||
]
|
||||
),
|
||||
.testTarget(
|
||||
|
|
|
@ -64,8 +64,9 @@ public extension LMAssembly {
|
|||
lmUserOverride = .init(dataURL: uomDataURL)
|
||||
}
|
||||
|
||||
public func setOptions(handler: (inout Config) -> Void) {
|
||||
@discardableResult public func setOptions(handler: (inout Config) -> Void) -> LMInstantiator {
|
||||
handler(&config)
|
||||
return self
|
||||
}
|
||||
|
||||
@discardableResult public static func connectSQLDB(dbPath: String, dropPreviousConnection: Bool = true) -> Bool {
|
||||
|
@ -97,6 +98,7 @@ public extension LMAssembly {
|
|||
|
||||
// 磁帶資料模組。「currentCassette」對外唯讀,僅用來讀取磁帶本身的中繼資料(Metadata)。
|
||||
static var lmCassette = LMCassette()
|
||||
static var lmPlainBopomofo = LMPlainBopomofo()
|
||||
|
||||
// 聲明使用者語言模組。
|
||||
// 使用者語言模組使用多執行緒的話,可能會導致一些問題。有時間再仔細排查看看。
|
||||
|
@ -111,7 +113,6 @@ public extension LMAssembly {
|
|||
)
|
||||
var lmReplacements = LMReplacements()
|
||||
var lmAssociates = LMAssociates()
|
||||
var lmPlainBopomofo = LMPlainBopomofo()
|
||||
|
||||
// 半衰记忆模组
|
||||
var lmUserOverride: LMUserOverride
|
||||
|
@ -189,23 +190,6 @@ public extension LMAssembly {
|
|||
}
|
||||
}
|
||||
|
||||
public func loadSCPCSequencesData() {
|
||||
let fileName = !isCHS ? "sequenceDataFromEtenDOS-cht" : "sequenceDataFromEtenDOS-chs"
|
||||
guard let path = Bundle.module.path(forResource: fileName, ofType: "json") else {
|
||||
vCLog("lmPlainBopomofo: File name access failure: \(fileName)")
|
||||
return
|
||||
}
|
||||
DispatchQueue.main.async {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
self.lmPlainBopomofo.clear()
|
||||
self.lmPlainBopomofo.open(path)
|
||||
vCLog("lmPlainBopomofo: \(self.lmPlainBopomofo.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
vCLog("lmPlainBopomofo: File access failure: \(path)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public var isCassetteDataLoaded: Bool { Self.lmCassette.isLoaded }
|
||||
public static func loadCassetteData(path: String) {
|
||||
DispatchQueue.main.async {
|
||||
|
@ -340,7 +324,9 @@ public extension LMAssembly {
|
|||
|
||||
// 如果有檢測到使用者自訂逐字選字語料庫內的相關資料的話,在這裡先插入。
|
||||
if config.isSCPCEnabled {
|
||||
rawAllUnigrams += lmPlainBopomofo.valuesFor(key: keyChain).map { Megrez.Unigram(value: $0, score: 0) }
|
||||
rawAllUnigrams += Self.lmPlainBopomofo.valuesFor(key: keyChain, isCHS: isCHS).map {
|
||||
Megrez.Unigram(value: $0, score: 0)
|
||||
}
|
||||
}
|
||||
|
||||
// 用 reversed 指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -9,60 +9,35 @@
|
|||
import Foundation
|
||||
import Shared
|
||||
|
||||
public extension LMAssembly {
|
||||
@frozen struct LMPlainBopomofo {
|
||||
public private(set) var filePath: String?
|
||||
var dataMap: [String: String] = [:]
|
||||
extension LMAssembly {
|
||||
struct LMPlainBopomofo {
|
||||
@usableFromInline typealias DataMap = [String: [String: String]]
|
||||
let dataMap: DataMap
|
||||
|
||||
public var count: Int { dataMap.count }
|
||||
|
||||
public init() {
|
||||
dataMap = [:]
|
||||
do {
|
||||
let rawData = jsnEtenDosSequence.data(using: .utf8) ?? .init([])
|
||||
let rawJSON = try JSONDecoder().decode([String: [String: String]].self, from: rawData)
|
||||
dataMap = rawJSON
|
||||
} catch {
|
||||
vCLog("\(error)")
|
||||
vCLog("↑ Exception happened when parsing raw JSON sequence data from vChewing LMAssembly.")
|
||||
dataMap = [:]
|
||||
}
|
||||
}
|
||||
|
||||
public var isLoaded: Bool { !dataMap.isEmpty }
|
||||
|
||||
@discardableResult public mutating func open(_ path: String) -> Bool {
|
||||
if isLoaded { return false }
|
||||
let oldPath = filePath
|
||||
filePath = nil
|
||||
|
||||
do {
|
||||
let rawData = try Data(contentsOf: URL(fileURLWithPath: path))
|
||||
let rawJSON = try JSONDecoder().decode([String: String].self, from: rawData)
|
||||
dataMap = rawJSON
|
||||
} catch {
|
||||
filePath = oldPath
|
||||
vCLog("\(error)")
|
||||
vCLog("↑ Exception happened when reading JSON file at: \(path).")
|
||||
return false
|
||||
}
|
||||
|
||||
filePath = path
|
||||
return true
|
||||
}
|
||||
|
||||
public mutating func clear() {
|
||||
filePath = nil
|
||||
dataMap.removeAll()
|
||||
}
|
||||
|
||||
public func saveData() {
|
||||
guard let filePath = filePath, let plistURL = URL(string: filePath) else { return }
|
||||
do {
|
||||
let plistData = try PropertyListSerialization.data(fromPropertyList: dataMap, format: .binary, options: 0)
|
||||
try plistData.write(to: plistURL)
|
||||
} catch {
|
||||
vCLog("Failed to save current database to: \(filePath)")
|
||||
}
|
||||
}
|
||||
|
||||
public func valuesFor(key: String) -> [String] {
|
||||
public func valuesFor(key: String, isCHS: Bool) -> [String] {
|
||||
var pairs: [String] = []
|
||||
if let arrRangeRecords: String = dataMap[key]?.trimmingCharacters(in: .newlines) {
|
||||
let subKey = isCHS ? "S" : "T"
|
||||
if let arrRangeRecords: String = dataMap[key]?[subKey] {
|
||||
pairs.append(contentsOf: arrRangeRecords.map(\.description))
|
||||
}
|
||||
return pairs.deduplicated
|
||||
// 這裡不做去重複處理,因為倚天中文系統注音排序適應者們已經形成了肌肉記憶。
|
||||
return pairs
|
||||
}
|
||||
|
||||
public func hasValuesFor(key: String) -> Bool { dataMap.keys.contains(key) }
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,36 @@
|
|||
//// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// StringView Ranges extension by (c) 2022 and onwards Isaac Xen (MIT License).
|
||||
// ====================
|
||||
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
|
||||
// ... with NTL restriction stating that:
|
||||
// No trademark license is granted to use the trade names, trademarks, service
|
||||
// marks, or product names of Contributor, except as required to fulfill notice
|
||||
// requirements defined in MIT License.
|
||||
|
||||
import Foundation
|
||||
import XCTest
|
||||
|
||||
@testable import LangModelAssembly
|
||||
|
||||
final class LMPlainBPMFTests: XCTestCase {
|
||||
func testLMPlainBPMFDataQuery() throws {
|
||||
let instance1 = LMAssembly.LMInstantiator(isCHS: false).setOptions { config in
|
||||
config.isSCPCEnabled = true
|
||||
}
|
||||
var liu2 = instance1.unigramsFor(keyArray: ["ㄌㄧㄡˊ"]).map(\.value).prefix(3)
|
||||
var bao3 = instance1.unigramsFor(keyArray: ["ㄅㄠˇ"]).map(\.value).prefix(3)
|
||||
var jie2 = instance1.unigramsFor(keyArray: ["ㄐㄧㄝˊ"]).map(\.value).prefix(3)
|
||||
XCTAssertEqual(liu2, ["劉", "流", "留"])
|
||||
XCTAssertEqual(bao3, ["保", "寶", "飽"])
|
||||
XCTAssertEqual(jie2, ["節", "潔", "傑"])
|
||||
let instance2 = LMAssembly.LMInstantiator(isCHS: true).setOptions { config in
|
||||
config.isSCPCEnabled = true
|
||||
}
|
||||
liu2 = instance2.unigramsFor(keyArray: ["ㄌㄧㄡˊ"]).map(\.value).prefix(3)
|
||||
bao3 = instance2.unigramsFor(keyArray: ["ㄅㄠˇ"]).map(\.value).prefix(3)
|
||||
jie2 = instance2.unigramsFor(keyArray: ["ㄐㄧㄝˊ"]).map(\.value).prefix(3)
|
||||
XCTAssertEqual(liu2, ["刘", "流", "留"])
|
||||
XCTAssertEqual(bao3, ["保", "宝", "饱"])
|
||||
XCTAssertEqual(jie2, ["节", "洁", "杰"])
|
||||
}
|
||||
}
|
|
@ -81,7 +81,6 @@ public class LMMgr {
|
|||
|
||||
if PrefMgr.shared.associatedPhrasesEnabled { Self.loadUserAssociatesData() }
|
||||
if PrefMgr.shared.phraseReplacementEnabled { Self.loadUserPhraseReplacement() }
|
||||
if PrefMgr.shared.useSCPCTypingMode { Self.loadSCPCSequencesData() }
|
||||
|
||||
CandidateNode.load(url: Self.userSymbolMenuDataURL())
|
||||
return
|
||||
|
@ -125,12 +124,6 @@ public class LMMgr {
|
|||
}
|
||||
}
|
||||
|
||||
public static func loadSCPCSequencesData() {
|
||||
Shared.InputMode.validCases.forEach { mode in
|
||||
mode.langModel.loadSCPCSequencesData()
|
||||
}
|
||||
}
|
||||
|
||||
public static func reloadUserFilterDirectly(mode: Shared.InputMode) {
|
||||
mode.langModel.reloadUserFilterDirectly(path: userDictDataURL(mode: mode, type: .theFilter).path)
|
||||
}
|
||||
|
|
|
@ -337,18 +337,17 @@ import SwiftExtension
|
|||
|
||||
@AppProperty(key: UserDef.kUseSCPCTypingMode.rawValue, defaultValue: false)
|
||||
public dynamic var useSCPCTypingMode: Bool {
|
||||
willSet {
|
||||
if newValue {
|
||||
LMMgr.loadSCPCSequencesData()
|
||||
LMMgr.syncLMPrefs()
|
||||
}
|
||||
didSet {
|
||||
LMMgr.syncLMPrefs()
|
||||
}
|
||||
}
|
||||
|
||||
@AppProperty(key: UserDef.kPhraseReplacementEnabled.rawValue, defaultValue: false)
|
||||
public dynamic var phraseReplacementEnabled: Bool {
|
||||
willSet {
|
||||
didSet {
|
||||
LMMgr.syncLMPrefs()
|
||||
}
|
||||
willSet {
|
||||
if newValue {
|
||||
LMMgr.loadUserPhraseReplacement()
|
||||
}
|
||||
|
@ -357,6 +356,9 @@ import SwiftExtension
|
|||
|
||||
@AppProperty(key: UserDef.kAssociatedPhrasesEnabled.rawValue, defaultValue: false)
|
||||
public dynamic var associatedPhrasesEnabled: Bool {
|
||||
didSet {
|
||||
LMMgr.syncLMPrefs()
|
||||
}
|
||||
willSet {
|
||||
if newValue {
|
||||
LMMgr.loadUserAssociatesData()
|
||||
|
|
|
@ -83,11 +83,6 @@ public extension SettingsPanesCocoa {
|
|||
SpeechSputnik.shared.refreshStatus()
|
||||
}
|
||||
|
||||
@IBAction func updateSCPCSettingsAction(_: NSControl) {
|
||||
guard PrefMgr.shared.useSCPCTypingMode else { return }
|
||||
LMMgr.loadSCPCSequencesData()
|
||||
}
|
||||
|
||||
@IBAction func updateUiLanguageAction(_ sender: NSPopUpButton) {
|
||||
let language = languages[sender.indexOfSelectedItem]
|
||||
guard let bundleID = Bundle.main.bundleIdentifier, bundleID.contains("vChewing") else {
|
||||
|
|
|
@ -98,12 +98,7 @@ public struct VwrSettingsPaneGeneral: View {
|
|||
UserDef.kKeepReadingUponCompositionError.bind($keepReadingUponCompositionError).render()
|
||||
UserDef.kClassicHaninKeyboardSymbolModeShortcutEnabled
|
||||
.bind($classicHaninKeyboardSymbolModeShortcutEnabled).render()
|
||||
UserDef.kUseSCPCTypingMode.bind(
|
||||
$useSCPCTypingMode.didChange {
|
||||
guard useSCPCTypingMode else { return }
|
||||
LMMgr.loadSCPCSequencesData()
|
||||
}
|
||||
).render()
|
||||
UserDef.kUseSCPCTypingMode.bind($useSCPCTypingMode).render()
|
||||
if Date.isTodayTheDate(from: 0401) {
|
||||
UserDef.kShouldNotFartInLieuOfBeep.bind(
|
||||
$shouldNotFartInLieuOfBeep.didChange { onFartControlChange() }
|
||||
|
|
Loading…
Reference in New Issue