vChewing-macOS/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/SubLMs/lmAssociates.swift

126 lines
4.1 KiB
Swift

// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
// ... with NTL restriction stating that:
// No trademark license is granted to use the trade names, trademarks, service
// marks, or product names of Contributor, except as required to fulfill notice
// requirements defined in MIT License.
import Megrez
import PinyinPhonaConverter
import Shared
public extension vChewingLM {
@frozen struct LMAssociates {
public private(set) var filePath: String?
var rangeMap: [String: [(Range<String.Index>, Int)]] = [:]
var strData: String = ""
public var count: Int { rangeMap.count }
public init() {
rangeMap = [:]
}
public var isLoaded: Bool { !rangeMap.isEmpty }
internal static func cnvNGramKeyFromPinyinToPhona(target: String) -> String {
guard target.contains("("), target.contains(","), target.contains(")") else {
return target
}
let arrTarget = target.dropLast().dropFirst().split(separator: ",")
guard arrTarget.count == 2 else { return target }
var arrTarget0 = String(arrTarget[0]).lowercased()
arrTarget0.convertToPhonabets()
return "(\(arrTarget0),\(arrTarget[1]))"
}
@discardableResult public mutating func open(_ path: String) -> Bool {
if isLoaded { return false }
let oldPath = filePath
filePath = nil
LMConsolidator.fixEOF(path: path)
LMConsolidator.consolidate(path: path, pragma: true)
do {
let rawStrData = try String(contentsOfFile: path, encoding: .utf8)
replaceData(textData: rawStrData)
} catch {
filePath = oldPath
vCLog("\(error)")
vCLog("↑ Exception happened when reading data at: \(path).")
return false
}
filePath = path
return true
}
///
/// - parameters:
/// - path:
public mutating func replaceData(textData rawStrData: String) {
if strData == rawStrData { return }
strData = rawStrData
var newMap: [String: [(Range<String.Index>, Int)]] = [:]
strData.parse(splitee: "\n") { theRange in
let theCells = rawStrData[theRange].split(separator: " ")
if theCells.count >= 2 {
let theKey = theCells[0].description
if theKey.first != "#" {
for (i, _) in theCells.enumerated() {
if i == 0 { continue }
if theCells[i].first == "#" { continue }
let newKey = Self.cnvNGramKeyFromPinyinToPhona(target: theKey)
newMap[newKey, default: []].append((theRange, i))
}
}
}
}
rangeMap = newMap
newMap.removeAll()
}
public mutating func clear() {
filePath = nil
strData.removeAll()
rangeMap.removeAll()
}
public func saveData() {
guard let filePath = filePath else { return }
do {
try strData.write(toFile: filePath, atomically: true, encoding: .utf8)
} catch {
vCLog("Failed to save current database to: \(filePath)")
}
}
public func valuesFor(pair: Megrez.Compositor.KeyValuePaired) -> [String] {
var pairs: [String] = []
if let arrRangeRecords: [(Range<String.Index>, Int)] = rangeMap[pair.toNGramKey] {
for (netaRange, index) in arrRangeRecords {
let neta = strData[netaRange].split(separator: " ")
let theValue: String = .init(neta[index])
pairs.append(theValue)
}
}
if let arrRangeRecords: [(Range<String.Index>, Int)] = rangeMap[pair.value] {
for (netaRange, index) in arrRangeRecords {
let neta = strData[netaRange].split(separator: " ")
let theValue: String = .init(neta[index])
pairs.append(theValue)
}
}
var set = Set<String>()
return pairs.filter { set.insert($0).inserted }
}
public func hasValuesFor(pair: Megrez.Compositor.KeyValuePaired) -> Bool {
if rangeMap[pair.toNGramKey] != nil { return true }
return rangeMap[pair.value] != nil
}
}
}