vChewing-macOS/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/SubLMs/lmCassette.swift

134 lines
5.9 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// StringView Ranges extension by (c) 2022 and onwards Isaac Xen (MIT License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
// ... with NTL restriction stating that:
// No trademark license is granted to use the trade names, trademarks, service
// marks, or product names of Contributor, except as required to fulfill notice
// requirements defined in MIT License.
import Foundation
import LineReader
import Megrez
import Shared
extension vChewingLM {
/// 便使
public class LMCassette {
public private(set) var nameENG: String = ""
public private(set) var nameCJK: String = ""
///
public private(set) var maxKeyLength: Int = 1
public private(set) var selectionKeys: [String] = []
public private(set) var endKeys: [String] = []
public private(set) var keyNameMap: [String: String] = [:]
public private(set) var charDefMap: [String: [String]] = [:]
///
public var count: Int { charDefMap.count }
///
public var isLoaded: Bool { !charDefMap.isEmpty }
/// 使
public var allowedKeys: [String] { Array(keyNameMap.keys + [" "]).deduplicated }
///
public func convertKeyToDisplay(char: String) -> String {
keyNameMap[char] ?? char
}
/// CIN
/// - Note:
/// - `%gen_inp` `%ename` cin
/// - `%ename` `%cname` CJK
/// - `%encoding` Swift UTF-8
/// - `%selkey`
/// - `%endkey`
/// - `%keyname begin` `%keyname end` Swift
/// - `%chardef begin` `%chardef end`
/// - Parameter path:
/// - Returns:
@discardableResult public func open(_ path: String) -> Bool {
if isLoaded { return false }
if FileManager.default.fileExists(atPath: path) {
do {
guard let fileHandle = FileHandle(forReadingAtPath: path) else {
throw FileErrors.fileHandleError("")
}
let lineReader = try LineReader(file: fileHandle)
var theMaxKeyLength = 1
var loadingKeys = false
var loadingCharDefinitions = false
for (_, strLine) in lineReader.enumerated() {
if !loadingKeys, strLine.contains("%keyname begin") { loadingKeys = true }
if loadingKeys, strLine.contains("%keyname end") { loadingKeys = false }
if !loadingCharDefinitions, strLine.contains("%chardef begin") { loadingCharDefinitions = true }
if loadingCharDefinitions, strLine.contains("%chardef end") { loadingCharDefinitions = false }
let cells: [String.SubSequence] =
strLine.contains("\t") ? strLine.split(separator: "\t") : strLine.split(separator: " ")
guard cells.count == 2 else { continue }
if loadingKeys, !cells[0].contains("%keyname") {
keyNameMap[String(cells[0])] = String(cells[1])
} else if loadingCharDefinitions, !strLine.contains("%chardef") {
theMaxKeyLength = max(theMaxKeyLength, cells[0].count)
charDefMap[String(cells[0]), default: []].append(String(cells[1]))
}
guard !loadingKeys, !loadingCharDefinitions else { continue }
if nameENG.isEmpty, strLine.contains("%ename ") {
for neta in cells[1].components(separatedBy: ";") {
let subNetaGroup = neta.components(separatedBy: ":")
if subNetaGroup.count == 2, subNetaGroup[1].contains("en") {
nameENG = String(subNetaGroup[0])
break
}
}
if nameENG.isEmpty { nameENG = String(cells[1]) }
}
if nameCJK.isEmpty, strLine.contains("%cname ") { nameCJK = String(cells[1]) }
if selectionKeys.isEmpty, strLine.contains("%selkey ") {
selectionKeys = cells[1].map { String($0) }.deduplicated
}
if endKeys.isEmpty, strLine.contains("%endkey ") {
endKeys = cells[1].map { String($0) }.deduplicated
}
}
maxKeyLength = theMaxKeyLength
return true
} catch {
vCLog("CIN Loading Failed: File Access Error.")
return false
}
}
vCLog("CIN Loading Failed: File Missing.")
return false
}
public func clear() {
keyNameMap.removeAll()
charDefMap.removeAll()
nameENG.removeAll()
nameCJK.removeAll()
selectionKeys.removeAll()
endKeys.removeAll()
maxKeyLength = 1
}
///
/// - parameters:
/// - key:
public func unigramsFor(key: String) -> [Megrez.Unigram] {
guard let arrRaw = charDefMap[key]?.deduplicated, !arrRaw.isEmpty else { return [] }
var arrResults = [Megrez.Unigram]()
for (i, neta) in arrRaw.enumerated() {
arrResults.append(.init(value: neta, score: Double(i) * -0.001))
}
return arrResults
}
///
/// - parameters:
/// - key:
public func hasUnigramsFor(key: String) -> Bool {
charDefMap[key] != nil
}
}
}