DataCompiler // Regex Optimization.

This commit is contained in:
ShikiSuen 2022-03-03 21:21:11 +08:00
parent 11572c23dc
commit e735cd33f3
1 changed files with 21 additions and 30 deletions

View File

@ -118,16 +118,13 @@ func rawDictForPhrases(isCHS: Bool) -> [Entry] {
}
//
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // macOS
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space
strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space
strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF
strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF
strRAW.regReplace(pattern: " +", replaceWith: " ") // ASCII
// strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // LF LF
// ( Entry)
strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") //
strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") //
// CJKWhiteSpace (\x{3000}) to ASCII Space
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
// Tab to ASCII Space
// ASCII
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") //
if strRAW.prefix(1) == " " { //
strRAW.removeFirst()
}
@ -195,16 +192,13 @@ func rawDictForKanjis(isCHS: Bool) -> [Entry] {
}
//
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // macOS
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space
strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space
strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF
strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF
strRAW.regReplace(pattern: " +", replaceWith: " ") // ASCII
// strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // LF LF
// ( Entry)
strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") //
strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") //
// CJKWhiteSpace (\x{3000}) to ASCII Space
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
// Tab to ASCII Space
// ASCII
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") //
if strRAW.prefix(1) == " " { //
strRAW.removeFirst()
}
@ -277,16 +271,13 @@ func rawDictForNonKanjis(isCHS: Bool) -> [Entry] {
}
//
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // macOS
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space
strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space
strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF
strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF
strRAW.regReplace(pattern: " +", replaceWith: " ") // ASCII
// strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // LF LF
// ( Entry)
strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") //
strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") //
// CJKWhiteSpace (\x{3000}) to ASCII Space
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
// Tab to ASCII Space
// ASCII
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") //
if strRAW.prefix(1) == " " { //
strRAW.removeFirst()
}