LMCassette // Refactor && Fix .clear().

This commit is contained in:
ShikiSuen 2023-12-31 01:21:22 +08:00
parent 4317c9c653
commit 1c92ab8edf
4 changed files with 278 additions and 328 deletions

View File

@ -40,21 +40,24 @@ public extension vChewingLM {
public private(set) var areCandidateKeysShiftHeld: Bool = false public private(set) var areCandidateKeysShiftHeld: Bool = false
public private(set) var supplyQuickResults: Bool = false public private(set) var supplyQuickResults: Bool = false
public private(set) var supplyPartiallyMatchedResults: Bool = false public private(set) var supplyPartiallyMatchedResults: Bool = false
/// 西 - NORM
/// 西
private static let fscale = 2.7
private var norm = 0.0 private var norm = 0.0
}
}
public extension vChewingLM.LMCassette {
/// 西 - fscale
private static let fscale = 2.7
/// ///
public var wildcard: String { wildcardKey.isEmpty ? "" : wildcardKey } var wildcard: String { wildcardKey.isEmpty ? "" : wildcardKey }
/// charDef /// charDef
public var count: Int { charDefMap.count } var count: Int { charDefMap.count }
/// ///
public var isLoaded: Bool { !charDefMap.isEmpty } var isLoaded: Bool { !charDefMap.isEmpty }
/// 使 /// 使
public var allowedKeys: [String] { Array(keyNameMap.keys + [" "]).deduplicated } var allowedKeys: [String] { Array(keyNameMap.keys + [" "]).deduplicated }
/// ///
public func convertKeyToDisplay(char: String) -> String { func convertKeyToDisplay(char: String) -> String {
keyNameMap[char] ?? char keyNameMap[char] ?? char
} }
@ -76,87 +79,92 @@ public extension vChewingLM {
/// ///
/// - Parameter path: /// - Parameter path:
/// - Returns: /// - Returns:
@discardableResult public mutating func open(_ path: String) -> Bool { @discardableResult mutating func open(_ path: String) -> Bool {
if isLoaded { return false } if isLoaded { return false }
let oldPath = filePath let oldPath = filePath
filePath = nil filePath = nil
if FileManager.default.fileExists(atPath: path) { if FileManager.default.fileExists(atPath: path) {
do { do {
guard let fileHandle = FileHandle(forReadingAtPath: path) else { guard let fileHandle = FileHandle(forReadingAtPath: path) else {
throw FileErrors.fileHandleError("") throw vChewingLM.FileErrors.fileHandleError("")
} }
let lineReader = try LineReader(file: fileHandle) let lineReader = try LineReader(file: fileHandle)
var theMaxKeyLength = 1 var theMaxKeyLength = 1
var loadingKeys = false var loadingKeys = false
var loadingQuickSets = false var loadingQuickSets = false {
var loadingCharDefinitions = false willSet {
var loadingSymbolDefinitions = false supplyQuickResults = true
if !newValue, quickDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
var loadingCharDefinitions = false {
willSet {
if !newValue, charDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
var loadingSymbolDefinitions = false {
willSet {
if !newValue, symbolDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
var loadingOctagramData = false var loadingOctagramData = false
var keysUsedInCharDef: Set<String> = .init() var keysUsedInCharDef: Set<String> = .init()
for strLine in lineReader { for strLine in lineReader {
if strLine.starts(with: "%keyname") { let isTabDelimiting = strLine.contains("\t")
if !loadingKeys, strLine.contains("begin") { loadingKeys = true } let cells = isTabDelimiting ? strLine.split(separator: "\t") : strLine.split(separator: " ")
if loadingKeys, strLine.contains("end") { loadingKeys = false } guard cells.count >= 1 else { continue }
} let strFirstCell = cells[0].trimmingCharacters(in: .newlines)
let strSecondCell = cells.count >= 2 ? cells[1].trimmingCharacters(in: .newlines) : nil
//
if strLine.first == "%", strFirstCell != "%" {
// %flag_disp_partial_match // %flag_disp_partial_match
if strLine == "%flag_disp_partial_match" { if strLine == "%flag_disp_partial_match" {
supplyPartiallyMatchedResults = true supplyPartiallyMatchedResults = true
supplyQuickResults = true supplyQuickResults = true
} }
// %quick guard let strSecondCell = strSecondCell else { continue }
if strLine.starts(with: "%quick") { processTags: switch strFirstCell {
supplyQuickResults = true case "%keyname" where strSecondCell == "begin": loadingKeys = true
if !loadingQuickSets, strLine.contains("begin") { case "%keyname" where strSecondCell == "end": loadingKeys = false
loadingQuickSets = true case "%quick" where strSecondCell == "begin": loadingQuickSets = true
case "%quick" where strSecondCell == "end": loadingQuickSets = false
case "%chardef" where strSecondCell == "begin": loadingCharDefinitions = true
case "%chardef" where strSecondCell == "end": loadingCharDefinitions = false
case "%symboldef" where strSecondCell == "begin": loadingSymbolDefinitions = true
case "%symboldef" where strSecondCell == "end": loadingSymbolDefinitions = false
case "%octagram" where strSecondCell == "begin": loadingOctagramData = true
case "%octagram" where strSecondCell == "end": loadingOctagramData = false
case "%ename" where nameENG.isEmpty:
parseSubCells: for neta in strSecondCell.components(separatedBy: ";") {
let subNetaGroup = neta.components(separatedBy: ":")
guard subNetaGroup.count == 2, subNetaGroup[1].contains("en") else { continue }
nameENG = String(subNetaGroup[0])
break parseSubCells
} }
if loadingQuickSets, strLine.contains("end") { guard nameENG.isEmpty else { break processTags }
loadingQuickSets = false nameENG = strSecondCell
if quickDefMap.keys.contains(wildcardKey) { wildcardKey = "" } case "%intlname" where nameIntl.isEmpty: nameIntl = strSecondCell.replacingOccurrences(of: "_", with: " ")
case "%cname" where nameCJK.isEmpty: nameCJK = strSecondCell
case "%sname" where nameShort.isEmpty: nameShort = strSecondCell
case "%nullcandidate" where nullCandidate.isEmpty: nullCandidate = strSecondCell
case "%selkey" where selectionKeys.isEmpty: selectionKeys = strSecondCell.map(\.description).deduplicated.joined()
case "%endkey" where endKeys.isEmpty: endKeys = strSecondCell.map(\.description).deduplicated
case "%wildcardkey" where wildcardKey.isEmpty: wildcardKey = strSecondCell.first?.description ?? ""
case "%keys_to_directly_commit" where keysToDirectlyCommit.isEmpty: keysToDirectlyCommit = strSecondCell
default: break processTags
} }
continue
} }
// %chardef
if strLine.starts(with: "%chardef") { //
if !loadingCharDefinitions, strLine.contains("begin") { guard let strSecondCell = strSecondCell else { continue }
loadingCharDefinitions = true if loadingKeys {
} keyNameMap[strFirstCell] = strSecondCell.trimmingCharacters(in: .newlines)
if loadingCharDefinitions, strLine.contains("end") { } else if loadingQuickSets {
loadingCharDefinitions = false
if charDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
// %symboldef
if strLine.starts(with: "%symboldef") {
if !loadingSymbolDefinitions, strLine.contains("begin") {
loadingSymbolDefinitions = true
}
if loadingSymbolDefinitions, strLine.contains("end") {
loadingSymbolDefinitions = false
if symbolDefMap.keys.contains(wildcardKey) { wildcardKey = "" }
}
}
// %octagram
if strLine.starts(with: "%octagram") {
if !loadingOctagramData, strLine.contains("begin") {
loadingOctagramData = true
}
if loadingOctagramData, strLine.contains("end") {
loadingOctagramData = false
}
}
// Start data parsing.
let cells: [String.SubSequence] =
strLine.contains("\t") ? strLine.split(separator: "\t") : strLine.split(separator: " ")
guard cells.count >= 2 else { continue }
let strFirstCell = cells[0].trimmingCharacters(in: .newlines)
let strSecondCell = cells[1].trimmingCharacters(in: .newlines)
if loadingKeys, !cells[0].starts(with: "%keyname") {
keyNameMap[strFirstCell] = cells[1].trimmingCharacters(in: .newlines)
} else if loadingQuickSets, !strLine.starts(with: "%quick") {
theMaxKeyLength = max(theMaxKeyLength, cells[0].count) theMaxKeyLength = max(theMaxKeyLength, cells[0].count)
quickDefMap[strFirstCell, default: .init()].append(strSecondCell) quickDefMap[strFirstCell, default: .init()].append(strSecondCell)
} else if loadingCharDefinitions, !loadingSymbolDefinitions, } else if loadingCharDefinitions, !loadingSymbolDefinitions {
!strLine.starts(with: "%chardef"), !strLine.starts(with: "%symboldef")
{
theMaxKeyLength = max(theMaxKeyLength, cells[0].count) theMaxKeyLength = max(theMaxKeyLength, cells[0].count)
charDefMap[strFirstCell, default: []].append(strSecondCell) charDefMap[strFirstCell, default: []].append(strSecondCell)
if strFirstCell.count > 1 { if strFirstCell.count > 1 {
@ -170,12 +178,12 @@ public extension vChewingLM {
keyComps.removeLast() keyComps.removeLast()
charDefWildcardMap[keyComps.joined() + wildcard, default: []].append(strSecondCell) charDefWildcardMap[keyComps.joined() + wildcard, default: []].append(strSecondCell)
} }
} else if loadingSymbolDefinitions, !strLine.starts(with: "%chardef"), !strLine.starts(with: "%symboldef") { } else if loadingSymbolDefinitions {
theMaxKeyLength = max(theMaxKeyLength, cells[0].count) theMaxKeyLength = max(theMaxKeyLength, cells[0].count)
symbolDefMap[strFirstCell, default: []].append(strSecondCell) symbolDefMap[strFirstCell, default: []].append(strSecondCell)
reverseLookupMap[strSecondCell, default: []].append(strFirstCell) reverseLookupMap[strSecondCell, default: []].append(strFirstCell)
} else if loadingOctagramData, !strLine.starts(with: "%octagram") { } else if loadingOctagramData {
guard let countValue = Int(cells[1]) else { continue } guard let countValue = Int(strSecondCell) else { continue }
switch cells.count { switch cells.count {
case 2: octagramMap[strFirstCell] = countValue case 2: octagramMap[strFirstCell] = countValue
case 3: octagramDividedMap[strFirstCell] = (countValue, cells[2].trimmingCharacters(in: .newlines)) case 3: octagramDividedMap[strFirstCell] = (countValue, cells[2].trimmingCharacters(in: .newlines))
@ -183,35 +191,6 @@ public extension vChewingLM {
} }
norm += Self.fscale ** (Double(cells[0].count) / 3.0 - 1.0) * Double(countValue) norm += Self.fscale ** (Double(cells[0].count) / 3.0 - 1.0) * Double(countValue)
} }
guard !loadingKeys, !loadingQuickSets, !loadingCharDefinitions, !loadingOctagramData else { continue }
if nameENG.isEmpty, strLine.starts(with: "%ename ") {
for neta in cells[1].components(separatedBy: ";") {
let subNetaGroup = neta.components(separatedBy: ":")
if subNetaGroup.count == 2, subNetaGroup[1].contains("en") {
nameENG = String(subNetaGroup[0])
break
}
}
if nameENG.isEmpty { nameENG = strSecondCell }
}
if nameIntl.isEmpty, strLine.starts(with: "%intlname ") {
nameIntl = strSecondCell.replacingOccurrences(of: "_", with: " ")
}
if nameCJK.isEmpty, strLine.starts(with: "%cname ") { nameCJK = strSecondCell }
if nameShort.isEmpty, strLine.starts(with: "%sname ") { nameShort = strSecondCell }
if nullCandidate.isEmpty, strLine.starts(with: "%nullcandidate ") { nullCandidate = strSecondCell }
if selectionKeys.isEmpty, strLine.starts(with: "%selkey ") {
selectionKeys = cells[1].map(\.description).deduplicated.joined()
}
if endKeys.isEmpty, strLine.starts(with: "%endkey ") {
endKeys = cells[1].map(\.description).deduplicated
}
if wildcardKey.isEmpty, strLine.starts(with: "%wildcardkey ") {
wildcardKey = cells[1].first?.description ?? ""
}
if keysToDirectlyCommit.isEmpty, strLine.starts(with: "%keys_to_directly_commit ") {
keysToDirectlyCommit = strSecondCell
}
} }
// Post process. // Post process.
if CandidateKey.validate(keys: selectionKeys) != nil { selectionKeys = "1234567890" } if CandidateKey.validate(keys: selectionKeys) != nil { selectionKeys = "1234567890" }
@ -232,28 +211,11 @@ public extension vChewingLM {
return false return false
} }
public mutating func clear() { mutating func clear() {
filePath = nil self = .init()
nullCandidate.removeAll()
keyNameMap.removeAll()
quickDefMap.removeAll()
charDefMap.removeAll()
charDefWildcardMap.removeAll()
nameShort.removeAll()
nameENG.removeAll()
nameCJK.removeAll()
selectionKeys.removeAll()
endKeys.removeAll()
reverseLookupMap.removeAll()
octagramMap.removeAll()
octagramDividedMap.removeAll()
wildcardKey.removeAll()
nameIntl.removeAll()
maxKeyLength = 1
norm = 0
} }
public func quickSetsFor(key: String) -> String? { func quickSetsFor(key: String) -> String? {
guard !key.isEmpty else { return nil } guard !key.isEmpty else { return nil }
var result = [String]() var result = [String]()
if let specifiedResult = quickDefMap[key], !specifiedResult.isEmpty { if let specifiedResult = quickDefMap[key], !specifiedResult.isEmpty {
@ -280,7 +242,7 @@ public extension vChewingLM {
/// ///
/// - parameters: /// - parameters:
/// - key: /// - key:
public func unigramsFor(key: String) -> [Megrez.Unigram] { func unigramsFor(key: String) -> [Megrez.Unigram] {
let arrRaw = charDefMap[key]?.deduplicated ?? [] let arrRaw = charDefMap[key]?.deduplicated ?? []
var arrRawWildcard: [String] = [] var arrRawWildcard: [String] = []
if let arrRawWildcardValues = charDefWildcardMap[key]?.deduplicated, if let arrRawWildcardValues = charDefWildcardMap[key]?.deduplicated,
@ -323,7 +285,7 @@ public extension vChewingLM {
/// ///
/// - parameters: /// - parameters:
/// - key: /// - key:
public func hasUnigramsFor(key: String) -> Bool { func hasUnigramsFor(key: String) -> Bool {
charDefMap[key] != nil charDefMap[key] != nil
|| (charDefWildcardMap[key] != nil && key.contains(wildcard) && key.first?.description != wildcard) || (charDefWildcardMap[key] != nil && key.contains(wildcard) && key.first?.description != wildcard)
} }
@ -348,7 +310,6 @@ public extension vChewingLM {
} }
return weight return weight
} }
}
} }
// MARK: - // MARK: -

View File

@ -47,7 +47,7 @@ final class LMCassetteTests: XCTestCase {
NSLog("LMCassette: Finished loading CIN. Entries: \(lmCassette.count)") NSLog("LMCassette: Finished loading CIN. Entries: \(lmCassette.count)")
XCTAssertFalse(lmCassette.quickDefMap.isEmpty) XCTAssertFalse(lmCassette.quickDefMap.isEmpty)
print(lmCassette.quickSetsFor(key: ",.") ?? "") print(lmCassette.quickSetsFor(key: ",.") ?? "")
XCTAssertEqual(lmCassette.keyNameMap.count, 41) XCTAssertEqual(lmCassette.keyNameMap.count, 31)
XCTAssertEqual(lmCassette.charDefMap.count, 29491) XCTAssertEqual(lmCassette.charDefMap.count, 29491)
XCTAssertEqual(lmCassette.charDefWildcardMap.count, 11946) XCTAssertEqual(lmCassette.charDefWildcardMap.count, 11946)
XCTAssertEqual(lmCassette.octagramMap.count, 0) XCTAssertEqual(lmCassette.octagramMap.count, 0)

View File

@ -23,9 +23,9 @@ final class LMUserOverrideTests: XCTestCase {
func testUOM_1_BasicOps() throws { func testUOM_1_BasicOps() throws {
let uom = vChewingLM.LMUserOverride(capacity: capacity, decayConstant: Double(halfLife), dataURL: nullURL) let uom = vChewingLM.LMUserOverride(capacity: capacity, decayConstant: Double(halfLife), dataURL: nullURL)
let key = "((ㄍㄨㄥ-ㄙ,公司),(ㄉㄜ˙,的),ㄋㄧㄢˊ-ㄓㄨㄥ)" let key = "((ㄕㄣˊ-ㄌㄧˇ-ㄌㄧㄥˊ-ㄏㄨㄚˊ,神里綾華),(ㄉㄜ˙,的),ㄍㄡˇ)"
let headReading = "ㄋㄧㄢˊ-ㄓㄨㄥ" let headReading = "ㄍㄡˇ"
let expectedSuggestion = "年終" let expectedSuggestion = ""
observe(who: uom, key: key, candidate: expectedSuggestion, timestamp: nowTimeStamp) observe(who: uom, key: key, candidate: expectedSuggestion, timestamp: nowTimeStamp)
var suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp, headReading: headReading) var suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp, headReading: headReading)
XCTAssertEqual(Set(suggested.candidates.map(\.1.value)).first ?? "", expectedSuggestion) XCTAssertEqual(Set(suggested.candidates.map(\.1.value)).first ?? "", expectedSuggestion)
@ -46,10 +46,10 @@ final class LMUserOverrideTests: XCTestCase {
func testUOM_2_NewestAgainstRepeatedlyUsed() throws { func testUOM_2_NewestAgainstRepeatedlyUsed() throws {
let uom = vChewingLM.LMUserOverride(capacity: capacity, decayConstant: Double(halfLife), dataURL: nullURL) let uom = vChewingLM.LMUserOverride(capacity: capacity, decayConstant: Double(halfLife), dataURL: nullURL)
let key = "((ㄍㄨㄥ-ㄙ,公司),(ㄉㄜ˙,的),ㄋㄧㄢˊ-ㄓㄨㄥ)" let key = "((ㄕㄣˊ-ㄌㄧˇ-ㄌㄧㄥˊ-ㄏㄨㄚˊ,神里綾華),(ㄉㄜ˙,的),ㄍㄡˇ)"
let headReading = "ㄋㄧㄢˊ-ㄓㄨㄥ" let headReading = "ㄍㄡˇ"
let valRepeatedlyUsed = "年終" // let valRepeatedlyUsed = "" //
let valNewest = "年中" // let valNewest = "" //
let stamps: [Double] = [0, 0.5, 2, 2.5, 4, 4.5, 5.3].map { nowTimeStamp + halfLife * $0 } let stamps: [Double] = [0, 0.5, 2, 2.5, 4, 4.5, 5.3].map { nowTimeStamp + halfLife * $0 }
stamps.forEach { stamp in stamps.forEach { stamp in
observe(who: uom, key: key, candidate: valRepeatedlyUsed, timestamp: stamp) observe(who: uom, key: key, candidate: valRepeatedlyUsed, timestamp: stamp)
@ -62,8 +62,6 @@ final class LMUserOverrideTests: XCTestCase {
} }
// //
observe(who: uom, key: key, candidate: valNewest, timestamp: nowTimeStamp + halfLife * 23.4) observe(who: uom, key: key, candidate: valNewest, timestamp: nowTimeStamp + halfLife * 23.4)
suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp + halfLife * 23.6, headReading: headReading)
XCTAssertEqual(Set(suggested.candidates.map(\.1.value)).first ?? "", valNewest)
suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp + halfLife * 26, headReading: headReading) suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp + halfLife * 26, headReading: headReading)
XCTAssertEqual(Set(suggested.candidates.map(\.1.value)).first ?? "", valNewest) XCTAssertEqual(Set(suggested.candidates.map(\.1.value)).first ?? "", valNewest)
suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp + halfLife * 50, headReading: headReading) suggested = uom.getSuggestion(key: key, timestamp: nowTimeStamp + halfLife * 50, headReading: headReading)
@ -72,9 +70,9 @@ final class LMUserOverrideTests: XCTestCase {
} }
func testUOM_3_LRUTable() throws { func testUOM_3_LRUTable() throws {
let a = (key: "((ㄍㄨㄥ-ㄙ,公司),(ㄉㄜ˙,的),ㄋㄧㄢˊ-ㄓㄨㄥ)", value: "年終", head: "ㄋㄧㄢˊ-ㄓㄨㄥ") let a = (key: "((ㄕㄣˊ-ㄌㄧˇ-ㄌㄧㄥˊ-ㄏㄨㄚˊ,神里綾華),(ㄉㄜ˙,的),ㄍㄡˇ)", value: "", head: "ㄍㄡˇ")
let b = (key: "((ㄑㄧˋ-ㄧㄝˋ,企業),(ㄉㄜ˙,的),ㄐㄧㄤˇ-ㄐㄧㄣ)", value: "獎金", head: "ㄐㄧㄤˇ-ㄐㄧㄣ") let b = (key: "((ㄆㄞˋ-ㄇㄥˊ,派蒙),(ㄉㄜ˙,的),ㄐㄧㄤˇ-ㄐㄧㄣ)", value: "伙食費", head: "ㄏㄨㄛˇ-ㄕˊ-ㄈㄟˋ")
let c = (key: "((ㄒㄩㄝˊ-ㄕㄥ,學生),(ㄉㄜ˙,的),ㄈㄨˊ-ㄌㄧˋ)", value: "福利", head: "ㄈㄨˊ-ㄌㄧˋ") let c = (key: "((ㄍㄨㄛˊ-ㄅㄥ,國崩),(ㄉㄜ˙,的),ㄇㄠˋ-ㄗ˙)", value: "帽子", head: "ㄇㄠˋ-ㄗ˙")
let d = (key: "((ㄌㄟˊ-ㄉㄧㄢˋ-ㄐㄧㄤ-ㄐㄩㄣ,雷電將軍),(ㄉㄜ˙,的),ㄐㄧㄠˇ-ㄔㄡˋ)", value: "腳臭", head: "ㄐㄧㄠˇ-ㄔㄡˋ") let d = (key: "((ㄌㄟˊ-ㄉㄧㄢˋ-ㄐㄧㄤ-ㄐㄩㄣ,雷電將軍),(ㄉㄜ˙,的),ㄐㄧㄠˇ-ㄔㄡˋ)", value: "腳臭", head: "ㄐㄧㄠˇ-ㄔㄡˋ")
let uom = vChewingLM.LMUserOverride(capacity: 2, decayConstant: Double(halfLife), dataURL: nullURL) let uom = vChewingLM.LMUserOverride(capacity: 2, decayConstant: Double(halfLife), dataURL: nullURL)
observe(who: uom, key: a.key, candidate: a.value, timestamp: nowTimeStamp) observe(who: uom, key: a.key, candidate: a.value, timestamp: nowTimeStamp)

View File

@ -16,6 +16,7 @@
%phase_auto_skip_endkey %phase_auto_skip_endkey
%flag_disp_full_match %flag_disp_full_match
%flag_disp_partial_match %flag_disp_partial_match
%keys_to_directly_commit !@#$%^&*()-_=+[{]}\|:'"<>?
%keyname begin %keyname begin
a 1- a 1-
b 5v b 5v
@ -47,16 +48,6 @@ z 1v
/ 0v / 0v
; 0- ; 0-
, 8v , 8v
1
2
3
4
5
6
7
8
9
0
%keyname end %keyname end
%quick begin %quick begin
, ,火米精燈料鄰勞類營 , ,火米精燈料鄰勞類營