DataCompiler // Regex Optimization.
This commit is contained in:
parent
ece91e3e3f
commit
b461cae0c3
|
@ -118,16 +118,13 @@ func rawDictForPhrases(isCHS: Bool) -> [Entry] {
|
||||||
}
|
}
|
||||||
// 預處理格式
|
// 預處理格式
|
||||||
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記
|
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記
|
||||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space
|
// CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||||
strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space
|
// Tab to ASCII Space
|
||||||
strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF
|
// 統整連續空格為一個 ASCII 空格
|
||||||
strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF
|
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
|
||||||
strRAW.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格
|
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
|
||||||
// strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF
|
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // 去除行尾行首空格與重複行
|
||||||
// (不需要處理純空行,因為空記錄不會被轉為 Entry)
|
|
||||||
strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格
|
|
||||||
strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格
|
|
||||||
if strRAW.prefix(1) == " " { // 去除檔案開頭空格
|
if strRAW.prefix(1) == " " { // 去除檔案開頭空格
|
||||||
strRAW.removeFirst()
|
strRAW.removeFirst()
|
||||||
}
|
}
|
||||||
|
@ -195,16 +192,13 @@ func rawDictForKanjis(isCHS: Bool) -> [Entry] {
|
||||||
}
|
}
|
||||||
// 預處理格式
|
// 預處理格式
|
||||||
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記
|
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記
|
||||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space
|
// CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||||
strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space
|
// Tab to ASCII Space
|
||||||
strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF
|
// 統整連續空格為一個 ASCII 空格
|
||||||
strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF
|
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
|
||||||
strRAW.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格
|
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
|
||||||
// strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF
|
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // 去除行尾行首空格與重複行
|
||||||
// (不需要處理純空行,因為空記錄不會被轉為 Entry)
|
|
||||||
strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格
|
|
||||||
strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格
|
|
||||||
if strRAW.prefix(1) == " " { // 去除檔案開頭空格
|
if strRAW.prefix(1) == " " { // 去除檔案開頭空格
|
||||||
strRAW.removeFirst()
|
strRAW.removeFirst()
|
||||||
}
|
}
|
||||||
|
@ -277,16 +271,13 @@ func rawDictForNonKanjis(isCHS: Bool) -> [Entry] {
|
||||||
}
|
}
|
||||||
// 預處理格式
|
// 預處理格式
|
||||||
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記
|
strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記
|
||||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space
|
// CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||||
strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||||
strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space
|
// Tab to ASCII Space
|
||||||
strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF
|
// 統整連續空格為一個 ASCII 空格
|
||||||
strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF
|
strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
|
||||||
strRAW.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格
|
strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF
|
||||||
// strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF
|
strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // 去除行尾行首空格與重複行
|
||||||
// (不需要處理純空行,因為空記錄不會被轉為 Entry)
|
|
||||||
strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格
|
|
||||||
strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格
|
|
||||||
if strRAW.prefix(1) == " " { // 去除檔案開頭空格
|
if strRAW.prefix(1) == " " { // 去除檔案開頭空格
|
||||||
strRAW.removeFirst()
|
strRAW.removeFirst()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue