DataCompiler // Enhance healthCheck().

This commit is contained in:
ShikiSuen 2022-12-30 13:49:41 +08:00
parent 6941c6a532
commit 13143cb175
1 changed files with 36 additions and 2 deletions

View File

@ -27,6 +27,16 @@ extension String {
} }
} }
// MARK: - String charComponents Extension
extension String {
public var charComponents: [String] { map { String($0) } }
}
extension Array where Element == String.Element {
public var charComponents: [String] { map { String($0) } }
}
// MARK: - StringView Ranges Extension (by Isaac Xen) // MARK: - StringView Ranges Extension (by Isaac Xen)
extension String { extension String {
@ -136,6 +146,9 @@ private let urlPlistBPMFReverseLookupCNS6: String = "./data-bpmf-reverse-lookup-
private var isReverseLookupDictionaryProcessed: Bool = false private var isReverseLookupDictionaryProcessed: Bool = false
private var mapReverseLookupForCheck: [String: [String]] = [:]
private var exceptedChars: Set<String> = .init()
// MARK: - // MARK: -
func rawDictForPhrases(isCHS: Bool) -> [Unigram] { func rawDictForPhrases(isCHS: Bool) -> [Unigram] {
@ -243,6 +256,7 @@ func rawDictForKanjis(isCHS: Bool) -> [Unigram] {
NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String]) NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String])
var varLineData = "" var varLineData = ""
var mapReverseLookup: [String: [Data]] = [:] var mapReverseLookup: [String: [Data]] = [:]
var mapReverseLookupUnencrypted: [String: [String]] = [:]
for lineData in arrData { for lineData in arrData {
// 1,2,4 1,3,4 // 1,2,4 1,3,4
let varLineDataPre = lineData.components(separatedBy: " ").prefix(isCHS ? 2 : 1) let varLineDataPre = lineData.components(separatedBy: " ").prefix(isCHS ? 2 : 1)
@ -283,6 +297,7 @@ func rawDictForKanjis(isCHS: Bool) -> [Unigram] {
if phrase != "" { // if phrase != "" { //
if !isReverseLookupDictionaryProcessed { if !isReverseLookupDictionaryProcessed {
mapReverseLookup[phrase, default: []].append(cnvPhonabetToASCII(phone).data(using: .utf8)!) mapReverseLookup[phrase, default: []].append(cnvPhonabetToASCII(phone).data(using: .utf8)!)
mapReverseLookupUnencrypted[phrase, default: []].append(phone)
} }
arrUnigramRAW += [ arrUnigramRAW += [
Unigram( Unigram(
@ -297,6 +312,7 @@ func rawDictForKanjis(isCHS: Bool) -> [Unigram] {
isReverseLookupDictionaryProcessed = true isReverseLookupDictionaryProcessed = true
try PropertyListSerialization.data(fromPropertyList: mapReverseLookup, format: .binary, options: 0).write( try PropertyListSerialization.data(fromPropertyList: mapReverseLookup, format: .binary, options: 0).write(
to: URL(fileURLWithPath: urlPlistBPMFReverseLookup)) to: URL(fileURLWithPath: urlPlistBPMFReverseLookup))
mapReverseLookupForCheck = mapReverseLookupUnencrypted
} catch { } catch {
NSLog(" - Core Reverse Lookup Data Generation Failed.") NSLog(" - Core Reverse Lookup Data Generation Failed.")
} }
@ -368,6 +384,7 @@ func rawDictForNonKanjis(isCHS: Bool) -> [Unigram] {
} }
} }
if phrase != "" { // if phrase != "" { //
exceptedChars.insert(phrase)
arrUnigramRAW += [ arrUnigramRAW += [
Unigram( Unigram(
key: phone, value: phrase, score: 0.0, key: phone, value: phrase, score: 0.0,
@ -664,6 +681,7 @@ main()
// MARK: - // MARK: -
func healthCheck(_ data: [Unigram]) -> String { func healthCheck(_ data: [Unigram]) -> String {
while mapReverseLookupForCheck.isEmpty { sleep(1) }
var result = "" var result = ""
var unigramMonoChar = [String: Unigram]() var unigramMonoChar = [String: Unigram]()
var valueToScore = [String: Double]() var valueToScore = [String: Double]()
@ -691,15 +709,31 @@ func healthCheck(_ data: [Unigram]) -> String {
var competants = [Unigram]() var competants = [Unigram]()
var tscore: Double = 0 var tscore: Double = 0
var bad = false var bad = false
for x in neta.key.split(separator: "-") { let checkPerCharMachingStatus: Bool = neta.key.split(separator: "-").count == neta.value.count
outerMatchCheck: for (i, x) in neta.key.split(separator: "-").enumerated() {
if !unigramMonoChar.keys.contains(String(x)) { if !unigramMonoChar.keys.contains(String(x)) {
bad = true bad = true
break break outerMatchCheck
}
innerMatchCheck: if checkPerCharMachingStatus {
let char = neta.value.charComponents[i]
if exceptedChars.contains(char) { break innerMatchCheck }
guard let queriedPhones = mapReverseLookupForCheck[char] else {
bad = true
break outerMatchCheck
}
for queriedPhone in queriedPhones {
if queriedPhone == x.description { break innerMatchCheck }
}
bad = true
break outerMatchCheck
} }
guard let u = unigramMonoChar[String(x)] else { continue } guard let u = unigramMonoChar[String(x)] else { continue }
tscore += u.score tscore += u.score
competants.append(u) competants.append(u)
} }
if bad { if bad {
faulty.append(neta) faulty.append(neta)
continue continue