dataCompiler // Further optimization of regex processing.

This commit is contained in:
ShikiSuen 2022-03-10 10:36:21 +08:00
parent c27608febe
commit 2233abc6ff
1 changed files with 12 additions and 38 deletions

View File

@ -123,24 +123,14 @@ func rawDictForPhrases(isCHS: Bool) -> [Entry] {
// Tab to ASCII Space
// ASCII
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") //
if strRAW.prefix(1) == " " { //
strRAW.removeFirst()
}
if strRAW.suffix(1) == " " { //
strRAW.removeLast()
}
strRAW.regReplace(pattern: #"(^ | $)"#, replaceWith: "") //
strRAW.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & Form Feed to LF,
strRAW.regReplace(pattern: #"^(#.*|.*#WIN32.*)$"#, replaceWith: "") // #+ WIN32
//
let arrData = Array(NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String])
var varLineData: String = ""
for lineData in arrData {
varLineData = lineData
//
varLineData.regReplace(pattern: "^#.*$", replaceWith: "") // #
varLineData.regReplace(pattern: "^.*#WIN32.*$", replaceWith: "") // WIN32
//
let arrLineData = varLineData.components(separatedBy: " ")
let arrLineData = lineData.components(separatedBy: " ")
var varLineDataProcessed: String = ""
var count = 0
for currentCell in arrLineData {
@ -197,25 +187,16 @@ func rawDictForKanjis(isCHS: Bool) -> [Entry] {
// Tab to ASCII Space
// ASCII
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") //
if strRAW.prefix(1) == " " { //
strRAW.removeFirst()
}
if strRAW.suffix(1) == " " { //
strRAW.removeLast()
}
strRAW.regReplace(pattern: #"(^ | $)"#, replaceWith: "") //
strRAW.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & Form Feed to LF,
strRAW.regReplace(pattern: #"^(#.*|.*#WIN32.*)$"#, replaceWith: "") // #+ WIN32
//
let arrData = Array(NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String])
var varLineData: String = ""
for lineData in arrData {
varLineData = lineData
//
varLineData.regReplace(pattern: "^#.*$", replaceWith: "") // #
varLineData.regReplace(pattern: "^.*#WIN32.*$", replaceWith: "") // WIN32
// 1,2,4 1,3,4
let varLineDataPre = varLineData.components(separatedBy: " ").prefix(isCHS ? 2 : 1).joined(separator: "\t")
let varLineDataPost = varLineData.components(separatedBy: " ").suffix(isCHS ? 1 : 2).joined(separator: "\t")
let varLineDataPre = lineData.components(separatedBy: " ").prefix(isCHS ? 2 : 1).joined(separator: "\t")
let varLineDataPost = lineData.components(separatedBy: " ").suffix(isCHS ? 1 : 2).joined(separator: "\t")
varLineData = varLineDataPre + "\t" + varLineDataPost
let arrLineData = varLineData.components(separatedBy: " ")
var varLineDataProcessed: String = ""
@ -276,22 +257,15 @@ func rawDictForNonKanjis(isCHS: Bool) -> [Entry] {
// Tab to ASCII Space
// ASCII
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") //
if strRAW.prefix(1) == " " { //
strRAW.removeFirst()
}
if strRAW.suffix(1) == " " { //
strRAW.removeLast()
}
strRAW.regReplace(pattern: #"(^ | $)"#, replaceWith: "") //
strRAW.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & Form Feed to LF,
strRAW.regReplace(pattern: #"^(#.*|.*#WIN32.*)$"#, replaceWith: "") // #+ WIN32
//
let arrData = Array(NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String])
var varLineData: String = ""
for lineData in arrData {
varLineData = lineData
//
varLineData.regReplace(pattern: "^#.*$", replaceWith: "") // #
varLineData.regReplace(pattern: "^.*#WIN32.*$", replaceWith: "") // WIN32
varLineData = varLineData.components(separatedBy: " ").prefix(3).joined(separator: "\t") //
let arrLineData = varLineData.components(separatedBy: " ")
var varLineDataProcessed: String = ""