LMCassette // Refactor && Fix .clear().

This commit is contained in:
ShikiSuen 2023-12-31 01:21:22 +08:00
parent 4317c9c653
commit 1c92ab8edf
4 changed files with 278 additions and 328 deletions

View File

@ -40,21 +40,24 @@ public extension vChewingLM {
public private(set) var areCandidateKeysShiftHeld: Bool = false
public private(set) var supplyQuickResults: Bool = false
public private(set) var supplyPartiallyMatchedResults: Bool = false
/// 西
private static let fscale = 2.7
/// 西 - NORM
private var norm = 0.0
}
}
public extension vChewingLM.LMCassette {
/// 西 - fscale
private static let fscale = 2.7
///
public var wildcard: String { wildcardKey.isEmpty ? "" : wildcardKey }
var wildcard: String { wildcardKey.isEmpty ? "" : wildcardKey }
/// charDef
public var count: Int { charDefMap.count }
var count: Int { charDefMap.count }
///
public var isLoaded: Bool { !charDefMap.isEmpty }
var isLoaded: Bool { !charDefMap.isEmpty }
/// 使
public var allowedKeys: [String] { Array(keyNameMap.keys + [" "]).deduplicated }
var allowedKeys: [String] { Array(keyNameMap.keys + [" "]).deduplicated }
///
public func convertKeyToDisplay(char: String) -> String {
func convertKeyToDisplay(char: String) -> String {
keyNameMap[char] ?? char
}
@ -76,87 +79,92 @@ public extension vChewingLM {
///
/// - Parameter path:
/// - Returns:
@discardableResult public mutating func open(_ path: String) -> Bool {
@discardableResult mutating func open(_ path: String) -> Bool {
if isLoaded { return false }
let oldPath = filePath
filePath = nil
if FileManager.default.fileExists(atPath: path) {
do {
guard let fileHandle = FileHandle(forReadingAtPath: path) else {
throw FileErrors.fileHandleError("")
throw vChewingLM.FileErrors.fileHandleError("")
}
let lineReader = try LineReader(file: fileHandle)
var theMaxKeyLength = 1
var loadingKeys = false
var loadingQuickSets = false
var loadingCharDefinitions = false
var loadingSymbolDefinitions = false
var loadingQuickSets = false {
willSet {
supplyQuickResults = true
if !newValue, quickDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
var loadingCharDefinitions = false {
willSet {
if !newValue, charDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
var loadingSymbolDefinitions = false {
willSet {
if !newValue, symbolDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
var loadingOctagramData = false
var keysUsedInCharDef: Set<String> = .init()
for strLine in lineReader {
if strLine.starts(with: "%keyname") {
if !loadingKeys, strLine.contains("begin") { loadingKeys = true }
if loadingKeys, strLine.contains("end") { loadingKeys = false }
}
let isTabDelimiting = strLine.contains("\t")
let cells = isTabDelimiting ? strLine.split(separator: "\t") : strLine.split(separator: " ")
guard cells.count >= 1 else { continue }
let strFirstCell = cells[0].trimmingCharacters(in: .newlines)
let strSecondCell = cells.count >= 2 ? cells[1].trimmingCharacters(in: .newlines) : nil
//
if strLine.first == "%", strFirstCell != "%" {
// %flag_disp_partial_match
if strLine == "%flag_disp_partial_match" {
supplyPartiallyMatchedResults = true
supplyQuickResults = true
}
// %quick
if strLine.starts(with: "%quick") {
supplyQuickResults = true
if !loadingQuickSets, strLine.contains("begin") {
loadingQuickSets = true
guard let strSecondCell = strSecondCell else { continue }
processTags: switch strFirstCell {
case "%keyname" where strSecondCell == "begin": loadingKeys = true
case "%keyname" where strSecondCell == "end": loadingKeys = false
case "%quick" where strSecondCell == "begin": loadingQuickSets = true
case "%quick" where strSecondCell == "end": loadingQuickSets = false
case "%chardef" where strSecondCell == "begin": loadingCharDefinitions = true
case "%chardef" where strSecondCell == "end": loadingCharDefinitions = false
case "%symboldef" where strSecondCell == "begin": loadingSymbolDefinitions = true
case "%symboldef" where strSecondCell == "end": loadingSymbolDefinitions = false
case "%octagram" where strSecondCell == "begin": loadingOctagramData = true
case "%octagram" where strSecondCell == "end": loadingOctagramData = false
case "%ename" where nameENG.isEmpty:
parseSubCells: for neta in strSecondCell.components(separatedBy: ";") {
let subNetaGroup = neta.components(separatedBy: ":")
guard subNetaGroup.count == 2, subNetaGroup[1].contains("en") else { continue }
nameENG = String(subNetaGroup[0])
break parseSubCells
}
if loadingQuickSets, strLine.contains("end") {
loadingQuickSets = false
if quickDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
guard nameENG.isEmpty else { break processTags }
nameENG = strSecondCell
case "%intlname" where nameIntl.isEmpty: nameIntl = strSecondCell.replacingOccurrences(of: "_", with: " ")
case "%cname" where nameCJK.isEmpty: nameCJK = strSecondCell
case "%sname" where nameShort.isEmpty: nameShort = strSecondCell
case "%nullcandidate" where nullCandidate.isEmpty: nullCandidate = strSecondCell
case "%selkey" where selectionKeys.isEmpty: selectionKeys = strSecondCell.map(\.description).deduplicated.joined()
case "%endkey" where endKeys.isEmpty: endKeys = strSecondCell.map(\.description).deduplicated
case "%wildcardkey" where wildcardKey.isEmpty: wildcardKey = strSecondCell.first?.description ?? ""
case "%keys_to_directly_commit" where keysToDirectlyCommit.isEmpty: keysToDirectlyCommit = strSecondCell
default: break processTags
}
continue
}
// %chardef
if strLine.starts(with: "%chardef") {
if !loadingCharDefinitions, strLine.contains("begin") {
loadingCharDefinitions = true
}
if loadingCharDefinitions, strLine.contains("end") {
loadingCharDefinitions = false
if charDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
// %symboldef
if strLine.starts(with: "%symboldef") {
if !loadingSymbolDefinitions, strLine.contains("begin") {
loadingSymbolDefinitions = true
}
if loadingSymbolDefinitions, strLine.contains("end") {
loadingSymbolDefinitions = false
if symbolDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
// %octagram
if strLine.starts(with: "%octagram") {
if !loadingOctagramData, strLine.contains("begin") {
loadingOctagramData = true
}
if loadingOctagramData, strLine.contains("end") {
loadingOctagramData = false
}
}
// Start data parsing.
let cells: [String.SubSequence] =
strLine.contains("\t") ? strLine.split(separator: "\t") : strLine.split(separator: " ")
guard cells.count >= 2 else { continue }
let strFirstCell = cells[0].trimmingCharacters(in: .newlines)
let strSecondCell = cells[1].trimmingCharacters(in: .newlines)
if loadingKeys, !cells[0].starts(with: "%keyname") {
keyNameMap[strFirstCell] = cells[1].trimmingCharacters(in: .newlines)
} else if loadingQuickSets, !strLine.starts(with: "%quick") {
//
guard let strSecondCell = strSecondCell else { continue }
if loadingKeys {
keyNameMap[strFirstCell] = strSecondCell.trimmingCharacters(in: .newlines)
} else if loadingQuickSets {
theMaxKeyLength = max(theMaxKeyLength, cells[0].count)
quickDefMap[strFirstCell, default: .init()].append(strSecondCell)
} else if loadingCharDefinitions, !loadingSymbolDefinitions,
!strLine.starts(with: "%chardef"), !strLine.starts(with: "%symboldef")
{
} else if loadingCharDefinitions, !loadingSymbolDefinitions {
theMaxKeyLength = max(theMaxKeyLength, cells[0].count)
charDefMap[strFirstCell, default: []].append(strSecondCell)
if strFirstCell.count > 1 {
@ -170,12 +178,12 @@ public extension vChewingLM {
keyComps.removeLast()
charDefWildcardMap[keyComps.joined() + wildcard, default: []].append(strSecondCell)
}
} else if loadingSymbolDefinitions, !strLine.starts(with: "%chardef"), !strLine.starts(with: "%symboldef") {
} else if loadingSymbolDefinitions {
theMaxKeyLength = max(theMaxKeyLength, cells[0].count)
symbolDefMap[strFirstCell, default: []].append(strSecondCell)
reverseLookupMap[strSecondCell, default: []].append(strFirstCell)
} else if loadingOctagramData, !strLine.starts(with: "%octagram") {
guard let countValue = Int(cells[1]) else { continue }
} else if loadingOctagramData {
guard let countValue = Int(strSecondCell) else { continue }
switch cells.count {
case 2: octagramMap[strFirstCell] = countValue
case 3: octagramDividedMap[strFirstCell] = (countValue, cells[2].trimmingCharacters(in: .newlines))
@ -183,35 +191,6 @@ public extension vChewingLM {
}
norm += Self.fscale ** (Double(cells[0].count) / 3.0 - 1.0) * Double(countValue)
}
guard !loadingKeys, !loadingQuickSets, !loadingCharDefinitions, !loadingOctagramData else { continue }
if nameENG.isEmpty, strLine.starts(with: "%ename ") {
for neta in cells[1].components(separatedBy: ";") {
let subNetaGroup = neta.components(separatedBy: ":")
if subNetaGroup.count == 2, subNetaGroup[1].contains("en") {
nameENG = String(subNetaGroup[0])
break
}
}
if nameENG.isEmpty { nameENG = strSecondCell }
}
if nameIntl.isEmpty, strLine.starts(with: "%intlname ") {
nameIntl = strSecondCell.replacingOccurrences(of: "_", with: " ")
}
if nameCJK.isEmpty, strLine.starts(with: "%cname ") { nameCJK = strSecondCell }
if nameShort.isEmpty, strLine.starts(with: "%sname ") { nameShort = strSecondCell }
if nullCandidate.isEmpty, strLine.starts(with: "%nullcandidate ") { nullCandidate = strSecondCell }
if selectionKeys.isEmpty, strLine.starts(with: "%selkey ") {
selectionKeys = cells[1].map(\.description).deduplicated.joined()
}
if endKeys.isEmpty, strLine.starts(with: "%endkey ") {
endKeys = cells[1].map(\.description).deduplicated
}
if wildcardKey.isEmpty, strLine.starts(with: "%wildcardkey ") {
wildcardKey = cells[1].first?.description ?? ""
}
if keysToDirectlyCommit.isEmpty, strLine.starts(with: "%keys_to_directly_commit ") {
keysToDirectlyCommit = strSecondCell
}
}
// Post process.
if CandidateKey.validate(keys: selectionKeys) != nil { selectionKeys = "1234567890" }
@ -232,28 +211,11 @@ public extension vChewingLM {
return false
}
public mutating func clear() {
filePath = nil
nullCandidate.removeAll()
keyNameMap.removeAll()
quickDefMap.removeAll()
charDefMap.removeAll()
charDefWildcardMap.removeAll()
nameShort.removeAll()
nameENG.removeAll()
nameCJK.removeAll()
selectionKeys.removeAll()
endKeys.removeAll()
reverseLookupMap.removeAll()
octagramMap.removeAll()
octagramDividedMap.removeAll()
wildcardKey.removeAll()
nameIntl.removeAll()
maxKeyLength = 1
norm = 0
mutating func clear() {
self = .init()
}
public func quickSetsFor(key: String) -> String? {
func quickSetsFor(key: String) -> String? {
guard !key.isEmpty else { return nil }
var result = [String]()
if let specifiedResult = quickDefMap[key], !specifiedResult.isEmpty {
@ -280,7 +242,7 @@ public extension vChewingLM {
///
/// - parameters:
/// - key:
public func unigramsFor(key: String) -> [Megrez.Unigram] {
func unigramsFor(key: String) -> [Megrez.Unigram] {
let arrRaw = charDefMap[key]?.deduplicated ?? []
var arrRawWildcard: [String] = []
if let arrRawWildcardValues = charDefWildcardMap[key]?.deduplicated,
@ -323,7 +285,7 @@ public extension vChewingLM {
///
/// - parameters:
/// - key:
public func hasUnigramsFor(key: String) -> Bool {
func hasUnigramsFor(key: String) -> Bool {
charDefMap[key] != nil
|| (charDefWildcardMap[key] != nil && key.contains(wildcard) && key.first?.description != wildcard)
}
@ -348,7 +310,6 @@ public extension vChewingLM {
}
return weight
}
}
}
// MARK: -

View File

@ -47,7 +47,7 @@ final class LMCassetteTests: XCTestCase {
NSLog("LMCassette: Finished loading CIN. Entries: \(lmCassette.count)")
XCTAssertFalse(lmCassette.quickDefMap.isEmpty)
print(lmCassette.quickSetsFor(key: ",.") ?? "")
XCTAssertEqual(lmCassette.keyNameMap.count, 41)
XCTAssertEqual(lmCassette.keyNameMap.count, 31)
XCTAssertEqual(lmCassette.charDefMap.count, 29491)
XCTAssertEqual(lmCassette.charDefWildcardMap.count, 11946)
XCTAssertEqual(lmCassette.octagramMap.count, 0)

View File

@ -23,9 +23,9 @@ final class LMUserOverrideTests: XCTestCase {
func testUOM_1_BasicOps() throws {
let uom = vChewingLM.LMUserOverride(capacity: capacity, decayConstant: Double(halfLife), dataURL: nullURL)
let key = "((ㄍㄨㄥ-ㄙ,公司),(ㄉㄜ˙,的),ㄋㄧㄢˊ-ㄓㄨㄥ)"
let headReading = "ㄋㄧㄢˊ-ㄓㄨㄥ"
let expectedSuggestion = "年終"
let key = "((ㄕㄣˊ-ㄌㄧˇ-ㄌㄧㄥˊ-ㄏㄨㄚˊ,神里綾華),(ㄉㄜ˙,的),ㄍㄡˇ)"
let headReading = "ㄍㄡˇ"
let expectedSuggestion = ""
observe(who: uom, key: key, candidate: expectedSuggestion, timestamp: nowTimeStamp)
var suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp, headReading: headReading)
XCTAssertEqual(Set(suggested.candidates.map(\.1.value)).first ?? "", expectedSuggestion)
@ -46,10 +46,10 @@ final class LMUserOverrideTests: XCTestCase {
func testUOM_2_NewestAgainstRepeatedlyUsed() throws {
let uom = vChewingLM.LMUserOverride(capacity: capacity, decayConstant: Double(halfLife), dataURL: nullURL)
let key = "((ㄍㄨㄥ-ㄙ,公司),(ㄉㄜ˙,的),ㄋㄧㄢˊ-ㄓㄨㄥ)"
let headReading = "ㄋㄧㄢˊ-ㄓㄨㄥ"
let valRepeatedlyUsed = "年終" //
let valNewest = "年中" //
let key = "((ㄕㄣˊ-ㄌㄧˇ-ㄌㄧㄥˊ-ㄏㄨㄚˊ,神里綾華),(ㄉㄜ˙,的),ㄍㄡˇ)"
let headReading = "ㄍㄡˇ"
let valRepeatedlyUsed = "" //
let valNewest = "" //
let stamps: [Double] = [0, 0.5, 2, 2.5, 4, 4.5, 5.3].map { nowTimeStamp + halfLife * $0 }
stamps.forEach { stamp in
observe(who: uom, key: key, candidate: valRepeatedlyUsed, timestamp: stamp)
@ -62,8 +62,6 @@ final class LMUserOverrideTests: XCTestCase {
}
//
observe(who: uom, key: key, candidate: valNewest, timestamp: nowTimeStamp + halfLife * 23.4)
suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp + halfLife * 23.6, headReading: headReading)
XCTAssertEqual(Set(suggested.candidates.map(\.1.value)).first ?? "", valNewest)
suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp + halfLife * 26, headReading: headReading)
XCTAssertEqual(Set(suggested.candidates.map(\.1.value)).first ?? "", valNewest)
suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp + halfLife * 50, headReading: headReading)
@ -72,9 +70,9 @@ final class LMUserOverrideTests: XCTestCase {
}
func testUOM_3_LRUTable() throws {
let a = (key: "((ㄍㄨㄥ-ㄙ,公司),(ㄉㄜ˙,的),ㄋㄧㄢˊ-ㄓㄨㄥ)", value: "年終", head: "ㄋㄧㄢˊ-ㄓㄨㄥ")
let b = (key: "((ㄑㄧˋ-ㄧㄝˋ,企業),(ㄉㄜ˙,的),ㄐㄧㄤˇ-ㄐㄧㄣ)", value: "獎金", head: "ㄐㄧㄤˇ-ㄐㄧㄣ")
let c = (key: "((ㄒㄩㄝˊ-ㄕㄥ,學生),(ㄉㄜ˙,的),ㄈㄨˊ-ㄌㄧˋ)", value: "福利", head: "ㄈㄨˊ-ㄌㄧˋ")
let a = (key: "((ㄕㄣˊ-ㄌㄧˇ-ㄌㄧㄥˊ-ㄏㄨㄚˊ,神里綾華),(ㄉㄜ˙,的),ㄍㄡˇ)", value: "", head: "ㄍㄡˇ")
let b = (key: "((ㄆㄞˋ-ㄇㄥˊ,派蒙),(ㄉㄜ˙,的),ㄐㄧㄤˇ-ㄐㄧㄣ)", value: "伙食費", head: "ㄏㄨㄛˇ-ㄕˊ-ㄈㄟˋ")
let c = (key: "((ㄍㄨㄛˊ-ㄅㄥ,國崩),(ㄉㄜ˙,的),ㄇㄠˋ-ㄗ˙)", value: "帽子", head: "ㄇㄠˋ-ㄗ˙")
let d = (key: "((ㄌㄟˊ-ㄉㄧㄢˋ-ㄐㄧㄤ-ㄐㄩㄣ,雷電將軍),(ㄉㄜ˙,的),ㄐㄧㄠˇ-ㄔㄡˋ)", value: "腳臭", head: "ㄐㄧㄠˇ-ㄔㄡˋ")
let uom = vChewingLM.LMUserOverride(capacity: 2, decayConstant: Double(halfLife), dataURL: nullURL)
observe(who: uom, key: a.key, candidate: a.value, timestamp: nowTimeStamp)

View File

@ -16,6 +16,7 @@
%phase_auto_skip_endkey
%flag_disp_full_match
%flag_disp_partial_match
%keys_to_directly_commit !@#$%^&*()-_=+[{]}\|:'"<>?
%keyname begin
a 1-
b 5v
@ -47,16 +48,6 @@ z 1v
/ 0v
; 0-
, 8v
1
2
3
4
5
6
7
8
9
0
%keyname end
%quick begin
, ,火米精燈料鄰勞類營