vChewing-macOS/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/SubLMs/lmRevLookup.swift

77 lines
2.9 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
// ... with NTL restriction stating that:
// No trademark license is granted to use the trade names, trademarks, service
// marks, or product names of Contributor, except as required to fulfill notice
// requirements defined in MIT License.
import Foundation
import Shared
public extension vChewingLM {
@frozen struct LMRevLookup {
public private(set) var dataMap: [String: [String]] = [:]
public private(set) var filePath: String = ""
public init(data dictData: (dict: [String: [String]]?, path: String)) {
guard let theDict = dictData.dict else {
vCLog("↑ Exception happened when reading JSON file at: \(dictData.path).")
return
}
filePath = dictData.path
dataMap = theDict
}
public init(path: String) {
if path.isEmpty { return }
do {
let rawData = try Data(contentsOf: URL(fileURLWithPath: path))
if let rawJSON = try? JSONSerialization.jsonObject(with: rawData) as? [String: [String]] {
dataMap = rawJSON
} else {
vCLog("↑ Exception happened when reading JSON file at: \(path).")
return
}
} catch {
vCLog("↑ Exception happened when reading JSON file at: \(path).")
return
}
filePath = path
}
public func query(with kanji: String) -> [String]? {
guard let resultData = dataMap[kanji] else { return nil }
let resultArray = resultData.compactMap {
let result = restorePhonabetFromASCII($0)
return result.isEmpty ? nil : result
}
return resultArray.isEmpty ? nil : resultArray
}
///
///
/// ASCII
/// - parameters:
/// - incoming:
func restorePhonabetFromASCII(_ incoming: String) -> String {
var strOutput = incoming
if !strOutput.contains("_") {
for entry in Self.dicPhonabet4ASCII {
strOutput = strOutput.replacingOccurrences(of: entry.key, with: entry.value)
}
}
return strOutput
}
// MARK: - Constants
static let dicPhonabet4ASCII: [String: String] = [
"b": "", "p": "", "m": "", "f": "", "d": "", "t": "", "n": "", "l": "", "g": "", "k": "", "h": "",
"j": "", "q": "", "x": "", "Z": "", "C": "", "S": "", "r": "", "z": "", "c": "", "s": "", "i": "",
"u": "", "v": "", "a": "", "o": "", "e": "", "E": "", "B": "", "P": "", "M": "", "F": "", "D": "",
"T": "", "N": "", "L": "", "R": "", "2": "ˊ", "3": "ˇ", "4": "ˋ", "5": "˙",
]
}
}