DataCompiler // Read new contents from raw folder.

This commit is contained in:
ShikiSuen 2023-05-06 17:17:17 +08:00
parent 53ad30ebdd
commit 08a069afde
1 changed files with 17 additions and 32 deletions

View File

@ -117,15 +117,8 @@ func cnvPhonabetToASCII(_ incoming: String) -> String {
private let urlCurrentFolder = URL(fileURLWithPath: FileManager.default.currentDirectoryPath) private let urlCurrentFolder = URL(fileURLWithPath: FileManager.default.currentDirectoryPath)
private let urlCHSforCustom: String = "./components/chs/phrases-custom-chs.txt" private let urlCHSRoot: String = "./components/chs/"
private let urlCHSforTABE: String = "./components/chs/phrases-tabe-chs.txt" private let urlCHTRoot: String = "./components/cht/"
private let urlCHSforMOE: String = "./components/chs/phrases-moe-chs.txt"
private let urlCHSforVCHEW: String = "./components/chs/phrases-vchewing-chs.txt"
private let urlCHTforCustom: String = "./components/cht/phrases-custom-cht.txt"
private let urlCHTforTABE: String = "./components/cht/phrases-tabe-cht.txt"
private let urlCHTforMOE: String = "./components/cht/phrases-moe-cht.txt"
private let urlCHTforVCHEW: String = "./components/cht/phrases-vchewing-cht.txt"
private let urlKanjiCore: String = "./components/common/char-kanji-core.txt" private let urlKanjiCore: String = "./components/common/char-kanji-core.txt"
private let urlMiscBPMF: String = "./components/common/char-misc-bpmf.txt" private let urlMiscBPMF: String = "./components/common/char-misc-bpmf.txt"
@ -161,28 +154,22 @@ private var exceptedChars: Set<String> = .init()
func rawDictForPhrases(isCHS: Bool) -> [Unigram] { func rawDictForPhrases(isCHS: Bool) -> [Unigram] {
var arrUnigramRAW: [Unigram] = [] var arrUnigramRAW: [Unigram] = []
var strRAWOrig: [String] = [] var strRAWOrigDict: [String: String] = [:]
let urlCustom: String = isCHS ? urlCHSforCustom : urlCHTforCustom let urlFolderRoot: String = isCHS ? urlCHSRoot : urlCHTRoot
let urlTABE: String = isCHS ? urlCHSforTABE : urlCHTforTABE
let urlMOE: String = isCHS ? urlCHSforMOE : urlCHTforMOE
let urlVCHEW: String = isCHS ? urlCHSforVCHEW : urlCHTforVCHEW
let i18n: String = isCHS ? "簡體中文" : "繁體中文" let i18n: String = isCHS ? "簡體中文" : "繁體中文"
// //
do { do {
let str1 = try String(contentsOfFile: urlCustom, encoding: .utf8) try FileManager.default.contentsOfDirectory(atPath: urlFolderRoot).forEach { thePath in
let str2 = try String(contentsOfFile: urlTABE, encoding: .utf8) guard thePath.contains("phrases-") else { return }
let str3 = try String(contentsOfFile: urlMOE, encoding: .utf8) let str = try String(contentsOfFile: urlFolderRoot + thePath, encoding: .utf8)
let str4 = try String(contentsOfFile: urlVCHEW, encoding: .utf8) strRAWOrigDict[thePath] = str
strRAWOrig.append(str1) }
strRAWOrig.append(str2)
strRAWOrig.append(str3)
strRAWOrig.append(str4)
} catch { } catch {
NSLog(" - Exception happened when reading raw phrases data.") NSLog(" - Exception happened when reading raw phrases data.")
return [] return []
} }
for i in 0 ..< strRAWOrig.count { for key in strRAWOrigDict.keys {
var strRAW = strRAWOrig[i] guard var strRAW = strRAWOrigDict[key] else { continue }
// //
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // macOS strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // macOS
// CJKWhiteSpace (\x{3000}) to ASCII Space // CJKWhiteSpace (\x{3000}) to ASCII Space
@ -193,16 +180,14 @@ func rawDictForPhrases(isCHS: Bool) -> [Unigram] {
strRAW.regReplace(pattern: #"(^ | $)"#, replaceWith: "") // strRAW.regReplace(pattern: #"(^ | $)"#, replaceWith: "") //
strRAW.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & Form Feed to LF, strRAW.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & Form Feed to LF,
strRAW.regReplace(pattern: #"^(#.*|.*#WIN32.*)$"#, replaceWith: "") // #+ WIN32 strRAW.regReplace(pattern: #"^(#.*|.*#WIN32.*)$"#, replaceWith: "") // #+ WIN32
strRAWOrig[i] = strRAW strRAWOrigDict[key] = strRAW
let currentCategory: Unigram.UnigramCategory = { let currentCategory: Unigram.UnigramCategory = {
switch i { if key.contains("-custom-") { return .custom }
case 0: return .custom if key.contains("-tabe-") { return .tabe }
case 1: return .tabe if key.contains("-moe-") { return .moe }
case 2: return .moe if key.contains("-vchewing-") { return .macv }
case 3: return .macv return .custom
default: return .custom
}
}() }()
var lineData = "" var lineData = ""
for lineNeta in strRAW.split(separator: "\n") { for lineNeta in strRAW.split(separator: "\n") {