Repo // Unseal the Hanin symbol menu data.
* Also make sure the Hanin symbol menu content is kept as-is.
This commit is contained in:
parent
df03e29b59
commit
7beb6d3cd2
|
@ -480,7 +480,7 @@ func fileOutput(isCHS: Bool) {
|
||||||
if neta.count >= 2 {
|
if neta.count >= 2 {
|
||||||
let theKey = String(neta[0])
|
let theKey = String(neta[0])
|
||||||
let theValue = String(neta[1])
|
let theValue = String(neta[1])
|
||||||
if !neta[0].isEmpty, !neta[1].isEmpty, line.first != "#", !theKey.contains("_punctuation_list") {
|
if !neta[0].isEmpty, !neta[1].isEmpty, line.first != "#" {
|
||||||
rangeMap[cnvPhonabetToASCII(theKey), default: []].append(theValue.data(using: .utf8)!)
|
rangeMap[cnvPhonabetToASCII(theKey), default: []].append(theValue.data(using: .utf8)!)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -346,6 +346,10 @@ public extension vChewingLM {
|
||||||
// 新增與日期、時間、星期有關的單元圖資料
|
// 新增與日期、時間、星期有關的單元圖資料
|
||||||
rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: keyChain))
|
rawAllUnigrams.append(contentsOf: queryDateTimeUnigrams(with: keyChain))
|
||||||
|
|
||||||
|
if keyChain == "_punctuation_list" {
|
||||||
|
rawAllUnigrams.append(contentsOf: lmCore.getHaninSymbolMenuUnigrams())
|
||||||
|
}
|
||||||
|
|
||||||
// 提前處理語彙置換
|
// 提前處理語彙置換
|
||||||
if isPhraseReplacementEnabled {
|
if isPhraseReplacementEnabled {
|
||||||
for i in 0 ..< rawAllUnigrams.count {
|
for i in 0 ..< rawAllUnigrams.count {
|
||||||
|
|
|
@ -128,10 +128,31 @@ public extension vChewingLM {
|
||||||
vCLog(strDump)
|
vCLog(strDump)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public func getHaninSymbolMenuUnigrams() -> [Megrez.Unigram] {
|
||||||
|
let key = "_punctuation_list"
|
||||||
|
var grams: [Megrez.Unigram] = []
|
||||||
|
guard let arrRangeRecords: [Data] = dataMap[cnvPhonabetToASCII(key)] else { return grams }
|
||||||
|
for netaSet in arrRangeRecords {
|
||||||
|
let strNetaSet = String(decoding: netaSet, as: UTF8.self)
|
||||||
|
let neta = Array(strNetaSet.trimmingCharacters(in: .newlines).split(separator: " ").reversed())
|
||||||
|
let theValue: String = .init(neta[0])
|
||||||
|
var theScore = defaultScore
|
||||||
|
if neta.count >= 2, !shouldForceDefaultScore {
|
||||||
|
theScore = .init(String(neta[1])) ?? defaultScore
|
||||||
|
}
|
||||||
|
if theScore > 0 {
|
||||||
|
theScore *= -1 // 應對可能忘記寫負號的情形
|
||||||
|
}
|
||||||
|
grams.append(Megrez.Unigram(value: theValue, score: theScore))
|
||||||
|
}
|
||||||
|
return grams
|
||||||
|
}
|
||||||
|
|
||||||
/// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成單元圖陣列。
|
/// 根據給定的讀音索引鍵,來獲取資料庫辭典內的對應資料陣列的 UTF8 資料、就地分析、生成單元圖陣列。
|
||||||
/// - parameters:
|
/// - parameters:
|
||||||
/// - key: 讀音索引鍵。
|
/// - key: 讀音索引鍵。
|
||||||
public func unigramsFor(key: String) -> [Megrez.Unigram] {
|
public func unigramsFor(key: String) -> [Megrez.Unigram] {
|
||||||
|
if key == "_punctuation_list" { return [] }
|
||||||
var grams: [Megrez.Unigram] = []
|
var grams: [Megrez.Unigram] = []
|
||||||
var gramsHW: [Megrez.Unigram] = []
|
var gramsHW: [Megrez.Unigram] = []
|
||||||
guard let arrRangeRecords: [Data] = dataMap[cnvPhonabetToASCII(key)] else { return grams }
|
guard let arrRangeRecords: [Data] = dataMap[cnvPhonabetToASCII(key)] else { return grams }
|
||||||
|
@ -148,10 +169,9 @@ public extension vChewingLM {
|
||||||
}
|
}
|
||||||
grams.append(Megrez.Unigram(value: theValue, score: theScore))
|
grams.append(Megrez.Unigram(value: theValue, score: theScore))
|
||||||
if !key.contains("_punctuation") { continue }
|
if !key.contains("_punctuation") { continue }
|
||||||
if let halfValue = theValue.applyingTransform(.fullwidthToHalfwidth, reverse: false) {
|
let halfValue = theValue.applyingTransformFW2HW(reverse: false)
|
||||||
if halfValue != theValue {
|
if halfValue != theValue {
|
||||||
gramsHW.append(Megrez.Unigram(value: halfValue, score: theScore))
|
gramsHW.append(Megrez.Unigram(value: halfValue, score: theScore))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
grams.append(contentsOf: gramsHW)
|
grams.append(contentsOf: gramsHW)
|
||||||
|
|
Loading…
Reference in New Issue