Repo // Unseal the Hanin symbol menu data.

* Also make sure the Hanin symbol menu content is kept as-is.
This commit is contained in:
ShikiSuen 2023-02-10 20:00:57 +08:00
parent df03e29b59
commit 7beb6d3cd2
3 changed files with 29 additions and 5 deletions

View File

@ -480,7 +480,7 @@ func fileOutput(isCHS: Bool) {
if neta.count >= 2 { if neta.count >= 2 {
let theKey = String(neta[0]) let theKey = String(neta[0])
let theValue = String(neta[1]) let theValue = String(neta[1])
if !neta[0].isEmpty, !neta[1].isEmpty, line.first != "#", !theKey.contains("_punctuation_list") { if !neta[0].isEmpty, !neta[1].isEmpty, line.first != "#" {
rangeMap[cnvPhonabetToASCII(theKey), default: []].append(theValue.data(using: .utf8)!) rangeMap[cnvPhonabetToASCII(theKey), default: []].append(theValue.data(using: .utf8)!)
} }
} }

View File

@ -346,6 +346,10 @@ public extension vChewingLM {
// //
rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: keyChain)) rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: keyChain))
if keyChain == "_punctuation_list" {
rawAllUnigrams.append(contentsOf: lmCore.getHaninSymbolMenuUnigrams())
}
// //
if isPhraseReplacementEnabled { if isPhraseReplacementEnabled {
for i in 0 ..< rawAllUnigrams.count { for i in 0 ..< rawAllUnigrams.count {

View File

@ -128,10 +128,31 @@ public extension vChewingLM {
vCLog(strDump) vCLog(strDump)
} }
public func getHaninSymbolMenuUnigrams() -> [Megrez.Unigram] {
let key = "_punctuation_list"
var grams: [Megrez.Unigram] = []
guard let arrRangeRecords: [Data] = dataMap[cnvPhonabetToASCII(key)] else { return grams }
for netaSet in arrRangeRecords {
let strNetaSet = String(decoding: netaSet, as: UTF8.self)
let neta = Array(strNetaSet.trimmingCharacters(in: .newlines).split(separator: " ").reversed())
let theValue: String = .init(neta[0])
var theScore = defaultScore
if neta.count >= 2, !shouldForceDefaultScore {
theScore = .init(String(neta[1])) ?? defaultScore
}
if theScore > 0 {
theScore *= -1 //
}
grams.append(Megrez.Unigram(value: theValue, score: theScore))
}
return grams
}
/// UTF8 /// UTF8
/// - parameters: /// - parameters:
/// - key: /// - key:
public func unigramsFor(key: String) -> [Megrez.Unigram] { public func unigramsFor(key: String) -> [Megrez.Unigram] {
if key == "_punctuation_list" { return [] }
var grams: [Megrez.Unigram] = [] var grams: [Megrez.Unigram] = []
var gramsHW: [Megrez.Unigram] = [] var gramsHW: [Megrez.Unigram] = []
guard let arrRangeRecords: [Data] = dataMap[cnvPhonabetToASCII(key)] else { return grams } guard let arrRangeRecords: [Data] = dataMap[cnvPhonabetToASCII(key)] else { return grams }
@ -148,10 +169,9 @@ public extension vChewingLM {
} }
grams.append(Megrez.Unigram(value: theValue, score: theScore)) grams.append(Megrez.Unigram(value: theValue, score: theScore))
if !key.contains("_punctuation") { continue } if !key.contains("_punctuation") { continue }
if let halfValue = theValue.applyingTransform(.fullwidthToHalfwidth, reverse: false) { let halfValue = theValue.applyingTransformFW2HW(reverse: false)
if halfValue != theValue { if halfValue != theValue {
gramsHW.append(Megrez.Unigram(value: halfValue, score: theScore)) gramsHW.append(Megrez.Unigram(value: halfValue, score: theScore))
}
} }
} }
grams.append(contentsOf: gramsHW) grams.append(contentsOf: gramsHW)