dataCompiler // Further optimization of regex processing.

This commit is contained in:
ShikiSuen 2022-03-10 10:36:21 +08:00
parent a8caee8e8d
commit b6ed837eac
1 changed files with 12 additions and 38 deletions

View File

@ -123,24 +123,14 @@ func rawDictForPhrases(isCHS: Bool) -> [Entry] {
// Tab to ASCII Space // Tab to ASCII Space
// ASCII // ASCII
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF strRAW.regReplace(pattern: #"(^ | $)"#, replaceWith: "") //
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // strRAW.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & Form Feed to LF,
if strRAW.prefix(1) == " " { // strRAW.regReplace(pattern: #"^(#.*|.*#WIN32.*)$"#, replaceWith: "") // #+ WIN32
strRAW.removeFirst()
}
if strRAW.suffix(1) == " " { //
strRAW.removeLast()
}
// //
let arrData = Array(NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String]) let arrData = Array(NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String])
var varLineData: String = ""
for lineData in arrData { for lineData in arrData {
varLineData = lineData
//
varLineData.regReplace(pattern: "^#.*$", replaceWith: "") // #
varLineData.regReplace(pattern: "^.*#WIN32.*$", replaceWith: "") // WIN32
// //
let arrLineData = varLineData.components(separatedBy: " ") let arrLineData = lineData.components(separatedBy: " ")
var varLineDataProcessed: String = "" var varLineDataProcessed: String = ""
var count = 0 var count = 0
for currentCell in arrLineData { for currentCell in arrLineData {
@ -197,25 +187,16 @@ func rawDictForKanjis(isCHS: Bool) -> [Entry] {
// Tab to ASCII Space // Tab to ASCII Space
// ASCII // ASCII
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF strRAW.regReplace(pattern: #"(^ | $)"#, replaceWith: "") //
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // strRAW.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & Form Feed to LF,
if strRAW.prefix(1) == " " { // strRAW.regReplace(pattern: #"^(#.*|.*#WIN32.*)$"#, replaceWith: "") // #+ WIN32
strRAW.removeFirst()
}
if strRAW.suffix(1) == " " { //
strRAW.removeLast()
}
// //
let arrData = Array(NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String]) let arrData = Array(NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String])
var varLineData: String = "" var varLineData: String = ""
for lineData in arrData { for lineData in arrData {
varLineData = lineData
//
varLineData.regReplace(pattern: "^#.*$", replaceWith: "") // #
varLineData.regReplace(pattern: "^.*#WIN32.*$", replaceWith: "") // WIN32
// 1,2,4 1,3,4 // 1,2,4 1,3,4
let varLineDataPre = varLineData.components(separatedBy: " ").prefix(isCHS ? 2 : 1).joined(separator: "\t") let varLineDataPre = lineData.components(separatedBy: " ").prefix(isCHS ? 2 : 1).joined(separator: "\t")
let varLineDataPost = varLineData.components(separatedBy: " ").suffix(isCHS ? 1 : 2).joined(separator: "\t") let varLineDataPost = lineData.components(separatedBy: " ").suffix(isCHS ? 1 : 2).joined(separator: "\t")
varLineData = varLineDataPre + "\t" + varLineDataPost varLineData = varLineDataPre + "\t" + varLineDataPost
let arrLineData = varLineData.components(separatedBy: " ") let arrLineData = varLineData.components(separatedBy: " ")
var varLineDataProcessed: String = "" var varLineDataProcessed: String = ""
@ -276,22 +257,15 @@ func rawDictForNonKanjis(isCHS: Bool) -> [Entry] {
// Tab to ASCII Space // Tab to ASCII Space
// ASCII // ASCII
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF strRAW.regReplace(pattern: #"(^ | $)"#, replaceWith: "") //
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // strRAW.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & Form Feed to LF,
if strRAW.prefix(1) == " " { // strRAW.regReplace(pattern: #"^(#.*|.*#WIN32.*)$"#, replaceWith: "") // #+ WIN32
strRAW.removeFirst()
}
if strRAW.suffix(1) == " " { //
strRAW.removeLast()
}
// //
let arrData = Array(NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String]) let arrData = Array(NSOrderedSet(array: strRAW.components(separatedBy: "\n")).array as! [String])
var varLineData: String = "" var varLineData: String = ""
for lineData in arrData { for lineData in arrData {
varLineData = lineData varLineData = lineData
// //
varLineData.regReplace(pattern: "^#.*$", replaceWith: "") // #
varLineData.regReplace(pattern: "^.*#WIN32.*$", replaceWith: "") // WIN32
varLineData = varLineData.components(separatedBy: " ").prefix(3).joined(separator: "\t") // varLineData = varLineData.components(separatedBy: " ").prefix(3).joined(separator: "\t") //
let arrLineData = varLineData.components(separatedBy: " ") let arrLineData = varLineData.components(separatedBy: " ")
var varLineDataProcessed: String = "" var varLineDataProcessed: String = ""