DataCompiler // Regex Optimization.
This commit is contained in:
parent
11572c23dc
commit
e735cd33f3
|
@ -118,16 +118,13 @@ func rawDictForPhrases(isCHS: Bool) -> [Entry] {
|
|||
}
|
||||
// 預處理格式
|
||||
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記
|
||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||
strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space
|
||||
strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF
|
||||
strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF
|
||||
strRAW.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格
|
||||
// strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF
|
||||
// (不需要處理純空行,因為空記錄不會被轉為 Entry)
|
||||
strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格
|
||||
strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格
|
||||
// CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||
// Tab to ASCII Space
|
||||
// 統整連續空格為一個 ASCII 空格
|
||||
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
|
||||
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
|
||||
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // 去除行尾行首空格與重複行
|
||||
if strRAW.prefix(1) == " " { // 去除檔案開頭空格
|
||||
strRAW.removeFirst()
|
||||
}
|
||||
|
@ -195,16 +192,13 @@ func rawDictForKanjis(isCHS: Bool) -> [Entry] {
|
|||
}
|
||||
// 預處理格式
|
||||
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記
|
||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||
strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space
|
||||
strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF
|
||||
strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF
|
||||
strRAW.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格
|
||||
// strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF
|
||||
// (不需要處理純空行,因為空記錄不會被轉為 Entry)
|
||||
strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格
|
||||
strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格
|
||||
// CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||
// Tab to ASCII Space
|
||||
// 統整連續空格為一個 ASCII 空格
|
||||
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
|
||||
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
|
||||
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // 去除行尾行首空格與重複行
|
||||
if strRAW.prefix(1) == " " { // 去除檔案開頭空格
|
||||
strRAW.removeFirst()
|
||||
}
|
||||
|
@ -277,16 +271,13 @@ func rawDictForNonKanjis(isCHS: Bool) -> [Entry] {
|
|||
}
|
||||
// 預處理格式
|
||||
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記
|
||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||
strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space
|
||||
strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF
|
||||
strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF
|
||||
strRAW.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格
|
||||
// strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF
|
||||
// (不需要處理純空行,因為空記錄不會被轉為 Entry)
|
||||
strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格
|
||||
strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格
|
||||
// CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||
// Tab to ASCII Space
|
||||
// 統整連續空格為一個 ASCII 空格
|
||||
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
|
||||
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
|
||||
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // 去除行尾行首空格與重複行
|
||||
if strRAW.prefix(1) == " " { // 去除檔案開頭空格
|
||||
strRAW.removeFirst()
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue