LMCoreNS // Auto-generate candidates of half-width punctuations.

This commit is contained in:
ShikiSuen 2022-11-26 05:45:49 +08:00
parent 7e57eb74c1
commit cdc8a3a113
1 changed files with 19 additions and 12 deletions

View File

@ -107,21 +107,28 @@ extension vChewingLM {
/// - key: /// - key:
public func unigramsFor(key: String) -> [Megrez.Unigram] { public func unigramsFor(key: String) -> [Megrez.Unigram] {
var grams: [Megrez.Unigram] = [] var grams: [Megrez.Unigram] = []
if let arrRangeRecords: [Data] = rangeMap[cnvPhonabetToASCII(key)] { var gramsHW: [Megrez.Unigram] = []
for netaSet in arrRangeRecords { guard let arrRangeRecords: [Data] = rangeMap[cnvPhonabetToASCII(key)] else { return grams }
let strNetaSet = String(decoding: netaSet, as: UTF8.self) for netaSet in arrRangeRecords {
let neta = Array(strNetaSet.trimmingCharacters(in: .newlines).split(separator: " ").reversed()) let strNetaSet = String(decoding: netaSet, as: UTF8.self)
let theValue: String = .init(neta[0]) let neta = Array(strNetaSet.trimmingCharacters(in: .newlines).split(separator: " ").reversed())
var theScore = defaultScore let theValue: String = .init(neta[0])
if neta.count >= 2, !shouldForceDefaultScore { var theScore = defaultScore
theScore = .init(String(neta[1])) ?? defaultScore if neta.count >= 2, !shouldForceDefaultScore {
theScore = .init(String(neta[1])) ?? defaultScore
}
if theScore > 0 {
theScore *= -1 //
}
grams.append(Megrez.Unigram(value: theValue, score: theScore))
if !key.contains("_punctuation") { continue }
if let halfValue = theValue.applyingTransform(.fullwidthToHalfwidth, reverse: false) {
if halfValue != theValue {
gramsHW.append(Megrez.Unigram(value: halfValue, score: theScore))
} }
if theScore > 0 {
theScore *= -1 //
}
grams.append(Megrez.Unigram(value: theValue, score: theScore))
} }
} }
grams.append(contentsOf: gramsHW)
return grams return grams
} }