LM // Swiftify: LMCore.
This commit is contained in:
parent
8e18bda5d1
commit
1b4b4149a0
|
@ -0,0 +1,168 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// 威注音重新設計原廠詞庫語言模組。不排序,但使用 Swift 內建的 String 處理。
|
||||
|
||||
import Foundation
|
||||
|
||||
extension vChewing {
|
||||
public class LMCore {
|
||||
var keyValueScoreMap: [String: [Megrez.Unigram]] = [:]
|
||||
var theData: String = ""
|
||||
var shouldReverse: Bool = false
|
||||
var allowConsolidation: Bool = false
|
||||
var defaultScore: Double = 0
|
||||
var shouldForceDefaultScore: Bool = false
|
||||
|
||||
public init(
|
||||
reverse: Bool = false, consolidate: Bool = false, defaultScore scoreDefault: Double = 0,
|
||||
forceDefaultScore: Bool = false
|
||||
) {
|
||||
keyValueScoreMap = [:]
|
||||
theData = ""
|
||||
allowConsolidation = consolidate
|
||||
shouldReverse = reverse
|
||||
defaultScore = scoreDefault
|
||||
shouldForceDefaultScore = forceDefaultScore
|
||||
}
|
||||
|
||||
deinit {
|
||||
if isLoaded() {
|
||||
close()
|
||||
}
|
||||
}
|
||||
|
||||
public func isLoaded() -> Bool {
|
||||
!keyValueScoreMap.isEmpty
|
||||
}
|
||||
|
||||
@discardableResult public func open(_ path: String) -> Bool {
|
||||
if isLoaded() {
|
||||
return false
|
||||
}
|
||||
|
||||
if allowConsolidation {
|
||||
if !LMConsolidator.fixEOF(path: path) {
|
||||
return false
|
||||
}
|
||||
if !LMConsolidator.consolidate(path: path, pragma: true) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
theData = try String(contentsOfFile: path, encoding: .utf8)
|
||||
} catch {
|
||||
IME.prtDebugIntel("\(error)")
|
||||
IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.")
|
||||
return false
|
||||
}
|
||||
|
||||
let length = theData.count
|
||||
guard length > 0 else {
|
||||
return false
|
||||
}
|
||||
|
||||
let arrData = theData.components(separatedBy: "\n")
|
||||
for (lineID, lineContent) in arrData.enumerated() {
|
||||
if !lineContent.hasPrefix("#") {
|
||||
let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ")
|
||||
if lineContent.components(separatedBy: " ").count < 2 {
|
||||
if arrData.last != "" {
|
||||
IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)")
|
||||
}
|
||||
continue
|
||||
}
|
||||
var currentUnigram = Megrez.Unigram(keyValue: Megrez.KeyValuePair(), score: defaultScore)
|
||||
var columnOne = ""
|
||||
var columnTwo = ""
|
||||
for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() {
|
||||
switch unitID {
|
||||
case 0:
|
||||
columnOne = unitContent
|
||||
case 1:
|
||||
columnTwo = unitContent
|
||||
case 2:
|
||||
if !shouldForceDefaultScore {
|
||||
if let unitContentConverted = Double(unitContent) {
|
||||
currentUnigram.score = unitContentConverted
|
||||
} else {
|
||||
IME.prtDebugIntel("Line #\(lineID) Score Data Wrecked: \(lineContent)")
|
||||
}
|
||||
}
|
||||
default: break
|
||||
}
|
||||
}
|
||||
let kvPair =
|
||||
shouldReverse
|
||||
? Megrez.KeyValuePair(key: columnTwo, value: columnOne)
|
||||
: Megrez.KeyValuePair(key: columnOne, value: columnTwo)
|
||||
currentUnigram.keyValue = kvPair
|
||||
let key = shouldReverse ? columnTwo : columnOne
|
||||
keyValueScoreMap[key, default: []].append(currentUnigram)
|
||||
}
|
||||
}
|
||||
IME.prtDebugIntel("\(keyValueScoreMap.count) entries of data loaded from: \(path)")
|
||||
theData = ""
|
||||
return true
|
||||
}
|
||||
|
||||
public func close() {
|
||||
if isLoaded() {
|
||||
keyValueScoreMap.removeAll()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Advanced features
|
||||
|
||||
public func dump() {
|
||||
var strDump = ""
|
||||
for entry in keyValueScoreMap {
|
||||
let rows: [Megrez.Unigram] = entry.1
|
||||
for row in rows {
|
||||
let addline = row.keyValue.key + " " + row.keyValue.value + " " + String(row.score) + "\n"
|
||||
strDump += addline
|
||||
}
|
||||
}
|
||||
IME.prtDebugIntel(strDump)
|
||||
}
|
||||
|
||||
open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
// 這裡用了點廢話處理,不然函數構建體會被 Swift 格式整理工具給毀掉。
|
||||
// 其實只要一句「[Megrez.Bigram]()」就夠了。
|
||||
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
|
||||
}
|
||||
|
||||
open func unigramsFor(key: String) -> [Megrez.Unigram] {
|
||||
keyValueScoreMap[key] ?? [Megrez.Unigram]()
|
||||
}
|
||||
|
||||
open func hasUnigramsFor(key: String) -> Bool {
|
||||
if let arrEntry = keyValueScoreMap[key] {
|
||||
return !arrEntry.isEmpty
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue