From 66933ca934086886558f22aa5d3aff8451fecc5b Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sat, 26 Feb 2022 14:28:03 +0800 Subject: [PATCH] UPE // UTF8 Surrogate Pair Issue Fix. - We don't use RegEx to remove the trailing spaces of each lines anymore. Reason: Swift built-in RegEx relies on NSRange which has no UTF8 surrogate pair support, leading to incorrectly-calculated modification spot when dealing with lines that contains surrogate-pair characters. - Also optimized the format-consolidating process. --- UserPhraseEditor/StringExtension.swift | 33 +++++++++++++++----------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/UserPhraseEditor/StringExtension.swift b/UserPhraseEditor/StringExtension.swift index 8a7c703e..70faf0f0 100644 --- a/UserPhraseEditor/StringExtension.swift +++ b/UserPhraseEditor/StringExtension.swift @@ -28,26 +28,31 @@ extension String { } mutating func formatConsolidate(HYPY2BPMF: Bool) { // Step 1: Consolidating formats per line. - var arrData = self.components(separatedBy: "\n") - var varLineData = "" - var strProcessed = "" - for lineData in arrData { - varLineData = lineData - varLineData.regReplace(pattern: " ", replaceWith: " ") // CJKWhiteSpace to ASCIISpace - varLineData.regReplace(pattern: " ", replaceWith: " ") // NonBreakWhiteSpace to ASCIISpace - varLineData.regReplace(pattern: "\\s+", replaceWith: " ") // Consolidating Consecutive Spaves - varLineData.regReplace(pattern: "^\\s", replaceWith: "") // Trim Leading Space - varLineData.regReplace(pattern: "\\s$", replaceWith: "") // Trim Trailing Space - strProcessed += varLineData - strProcessed += "\n" + var strProcessed = self + // 預處理格式 + strProcessed = strProcessed.replacingOccurrences(of: " #MACOS", with: "") // 去掉 macOS 標記 + strProcessed = strProcessed.replacingOccurrences(of: " ", with: " ") // CJKWhiteSpace (\x{3000}) to ASCII Space + strProcessed = strProcessed.replacingOccurrences(of: " ", with: " ") // NonBreakWhiteSpace (\x{A0}) to ASCII Space + strProcessed = strProcessed.replacingOccurrences(of: "\t", with: " ") // Tab to ASCII Space + strProcessed.regReplace(pattern: "\\f", replaceWith: "\n") // Form Feed to LF + strProcessed = strProcessed.replacingOccurrences(of: "\r", with: "\n") // CR to LF + strProcessed.regReplace(pattern: " +", replaceWith: " ") // 統整連續空格為一個 ASCII 空格 + strProcessed.regReplace(pattern: "\\n+", replaceWith: "\n") // 統整連續 LF 為一個 LF + strProcessed = strProcessed.replacingOccurrences(of: " \n", with: "\n") // 去除行尾空格 + strProcessed = strProcessed.replacingOccurrences(of: "\n ", with: "\n") // 去除行首空格 + if strProcessed.prefix(1) == " " { // 去除檔案開頭空格 + strProcessed.removeFirst() } - + if strProcessed.suffix(1) == " " { // 去除檔案結尾空格 + strProcessed.removeLast() + } + var arrData = [""] if HYPY2BPMF { // Step 0: Convert HanyuPinyin to Bopomofo. arrData = strProcessed.components(separatedBy: "\n") strProcessed = "" // Reset its value for lineData in arrData { - varLineData = lineData + var varLineData = lineData // 漢語拼音轉注音,得先從最長的可能的拼音組合開始轉起, // 這樣等轉換到更短的可能的漢語拼音組合時就不會出錯。 // 依此類推,聲調放在最後來轉換。