From e735cd33f311d57c91690442e6062b9f98da25d8 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Thu, 3 Mar 2022 21:21:11 +0800 Subject: [PATCH] DataCompiler // Regex Optimization. --- DataCompiler/dataCompiler.swift | 51 ++++++++++++++------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/DataCompiler/dataCompiler.swift b/DataCompiler/dataCompiler.swift index 4128f4c6..dea6fa77 100644 --- a/DataCompiler/dataCompiler.swift +++ b/DataCompiler/dataCompiler.swift @@ -118,16 +118,13 @@ func rawDictForPhrases(isCHS: Bool) -> [Entry] { } // 預處理格式 strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記 - strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space - strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space - strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space - strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF - strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF - strRAW.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格 - // strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF - // (不需要處理純空行,因為空記錄不會被轉為 Entry) - strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格 - strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格 + // CJKWhiteSpace (\x{3000}) to ASCII Space + // NonBreakWhiteSpace (\x{A0}) to ASCII Space + // Tab to ASCII Space + // 統整連續空格為一個 ASCII 空格 + strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") + strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF + strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // 去除行尾行首空格與重複行 if strRAW.prefix(1) == " " { // 去除檔案開頭空格 strRAW.removeFirst() } @@ -195,16 +192,13 @@ func rawDictForKanjis(isCHS: Bool) -> [Entry] { } // 預處理格式 strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記 - strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space - strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space - strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space - strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF - strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF - strRAW.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格 - // strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF - // (不需要處理純空行,因為空記錄不會被轉為 Entry) - strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格 - strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格 + // CJKWhiteSpace (\x{3000}) to ASCII Space + // NonBreakWhiteSpace (\x{A0}) to ASCII Space + // Tab to ASCII Space + // 統整連續空格為一個 ASCII 空格 + strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") + strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF + strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // 去除行尾行首空格與重複行 if strRAW.prefix(1) == " " { // 去除檔案開頭空格 strRAW.removeFirst() } @@ -277,16 +271,13 @@ func rawDictForNonKanjis(isCHS: Bool) -> [Entry] { } // 預處理格式 strRAW = strRAW.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記 - strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space - strRAW = strRAW.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space - strRAW = strRAW.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space - strRAW.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF - strRAW = strRAW.replacingOccurrences(of: "\r", with: "\n") // CR to LF - strRAW.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格 - // strRAW.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF - // (不需要處理純空行,因為空記錄不會被轉為 Entry) - strRAW = strRAW.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格 - strRAW = strRAW.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格 + // CJKWhiteSpace (\x{3000}) to ASCII Space + // NonBreakWhiteSpace (\x{A0}) to ASCII Space + // Tab to ASCII Space + // 統整連續空格為一個 ASCII 空格 + strRAW.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") + strRAW.regReplace(pattern: #"(\f+|\r+)+"#, replaceWith: "\n") // CR & Form Feed to LF + strRAW.regReplace(pattern: #"(\n+| \n+|\n+ )"#, replaceWith: "\n") // 去除行尾行首空格與重複行 if strRAW.prefix(1) == " " { // 去除檔案開頭空格 strRAW.removeFirst() }