From cfe9a1ce5d2c2b6497860c44de72e735c4f90f6a Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sun, 28 Jan 2024 00:32:23 +0800 Subject: [PATCH] LMInstantiator // Add ability for supplying NumPad results. --- .../LangModelAssembly/LMInstantiator.swift | 5 ++++ .../LMInstantiator_NumPadExtension.swift | 23 ++++++++++++++ .../NumPadDataTests.swift | 30 +++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator_NumPadExtension.swift create mode 100644 Packages/vChewing_LangModelAssembly/Tests/LangModelAssemblyTests/NumPadDataTests.swift diff --git a/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator.swift b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator.swift index ad02d0a1..486accc3 100644 --- a/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator.swift +++ b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator.swift @@ -31,6 +31,9 @@ public extension vChewingLM { /// 語言模組的相關資料的存放位置,僅藉由參數來讀取相關訊息。 class LMInstantiator: LangModelProtocol { public struct Config { + /// 如果設定為 nil 的話,則不產生任何詞頻資料。 + /// true = 全形,false = 半形。 + public var numPadFWHWStatus: Bool? public var isCassetteEnabled = false public var isPhraseReplacementEnabled = false public var isCNSEnabled = false @@ -338,6 +341,8 @@ public extension vChewingLM { rawAllUnigrams += lmUserPhrases.unigramsFor(key: keyChain).reversed() if !config.isCassetteEnabled || config.isCassetteEnabled && keyChain.map(\.description)[0] == "_" { + // 先給出 NumPad 的結果。 + rawAllUnigrams += supplyNumPadUnigrams(key: keyChain) // LMMisc 與 LMCore 的 score 在 (-10.0, 0.0) 這個區間內。 rawAllUnigrams += factoryUnigramsFor(key: keyChain, column: .theDataCHEW) rawAllUnigrams += factoryCoreUnigramsFor(key: keyChain) diff --git a/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator_NumPadExtension.swift b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator_NumPadExtension.swift new file mode 100644 index 00000000..7fba313f --- /dev/null +++ b/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMInstantiator_NumPadExtension.swift @@ -0,0 +1,23 @@ +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) +// ... with NTL restriction stating that: +// No trademark license is granted to use the trade names, trademarks, service +// marks, or product names of Contributor, except as required to fulfill notice +// requirements defined in MIT License. + +import Foundation +import Megrez + +public extension vChewingLM.LMInstantiator { + func supplyNumPadUnigrams(key: String) -> [Megrez.Unigram] { + guard let status = config.numPadFWHWStatus else { return [] } + let initials = "_NumPad_" + guard key.hasPrefix(initials) else { return [] } + let char = key.replacingOccurrences(of: initials, with: "") + guard char.count == 1 else { return [] } + let gram1 = Megrez.Unigram(value: char.applyingTransformFW2HW(reverse: status), score: 0) + let gram2 = Megrez.Unigram(value: char.applyingTransformFW2HW(reverse: !status), score: -0.1) + return [gram1, gram2] + } +} diff --git a/Packages/vChewing_LangModelAssembly/Tests/LangModelAssemblyTests/NumPadDataTests.swift b/Packages/vChewing_LangModelAssembly/Tests/LangModelAssemblyTests/NumPadDataTests.swift new file mode 100644 index 00000000..46e6e698 --- /dev/null +++ b/Packages/vChewing_LangModelAssembly/Tests/LangModelAssemblyTests/NumPadDataTests.swift @@ -0,0 +1,30 @@ +//// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// ==================== +// This code is released under the MIT license (SPDX-License-Identifier: MIT) +// ... with NTL restriction stating that: +// No trademark license is granted to use the trade names, trademarks, service +// marks, or product names of Contributor, except as required to fulfill notice +// requirements defined in MIT License. + +import Foundation +import XCTest + +@testable import LangModelAssembly + +final class LMInstantiatorNumericPadTests: XCTestCase { + func testSQL() throws { + let instance = vChewingLM.LMInstantiator(isCHS: true) + instance.setOptions { config in + config.numPadFWHWStatus = nil + } + XCTAssertEqual(instance.unigramsFor(keyArray: ["_NumPad_0"]).description, "[]") + instance.setOptions { config in + config.numPadFWHWStatus = true + } + XCTAssertEqual(instance.unigramsFor(keyArray: ["_NumPad_0"]).description, "[(0,0.0), (0,-0.1)]") + instance.setOptions { config in + config.numPadFWHWStatus = false + } + XCTAssertEqual(instance.unigramsFor(keyArray: ["_NumPad_0"]).description, "[(0,0.0), (0,-0.1)]") + } +}