diff --git a/Source/Modules/LangModelRelated/LMConsolidator.swift b/Source/Modules/LangModelRelated/LMConsolidator.swift index b9392b4f..3e16e6db 100644 --- a/Source/Modules/LangModelRelated/LMConsolidator.swift +++ b/Source/Modules/LangModelRelated/LMConsolidator.swift @@ -28,6 +28,9 @@ extension vChewing { public enum LMConsolidator { public static let kPragmaHeader = "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍" + /// 檢查給定檔案的標頭是否正常。 + /// - Parameter path: 給定檔案路徑。 + /// - Returns: 結果正常則為真,其餘為假。 public static func checkPragma(path: String) -> Bool { if FileManager.default.fileExists(atPath: path) { let fileHandle = FileHandle(forReadingAtPath: path)! @@ -51,12 +54,17 @@ extension vChewing { return false } + /// 檢查檔案是否以空行結尾,如果缺失則補充之。 + /// - Parameter path: 給定檔案路徑。 + /// - Returns: 結果正常或修復順利則為真,其餘為假。 @discardableResult public static func fixEOF(path: String) -> Bool { let urlPath = URL(fileURLWithPath: path) if FileManager.default.fileExists(atPath: path) { var strIncoming = "" do { strIncoming += try String(contentsOf: urlPath, encoding: .utf8) + /// 注意:Swift 版 LMConsolidator 並未在此安排對 EOF 的去重複工序。 + /// 但這個函式執行完之後往往就會 consolidate() 整理格式,所以不會有差。 if !strIncoming.hasSuffix("\n") { IME.prtDebugIntel("EOF Fix Necessity Confirmed, Start Fixing.") if let writeFile = FileHandle(forUpdatingAtPath: path), @@ -81,10 +89,14 @@ extension vChewing { return false } + /// 統整給定的檔案的格式。 + /// - Parameters: + /// - path: 給定檔案路徑。 + /// - shouldCheckPragma: 是否在檔案標頭完好無損的情況下略過對格式的整理。 + /// - Returns: 若整理順利或無須整理,則為真;反之為假。 @discardableResult public static func consolidate(path: String, pragma shouldCheckPragma: Bool) -> Bool { - var pragmaResult = false + let pragmaResult = checkPragma(path: path) if shouldCheckPragma { - pragmaResult = checkPragma(path: path) if pragmaResult { return true } @@ -105,6 +117,7 @@ extension vChewing { strProcessed.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") // 去除行尾行首空格 strProcessed.regReplace(pattern: #"(^ | $)"#, replaceWith: "") + strProcessed.regReplace(pattern: #"(\n | \n)"#, replaceWith: "\n") // CR & FF to LF, 且去除重複行 strProcessed.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") if strProcessed.prefix(1) == " " { // 去除檔案開頭空格 @@ -114,21 +127,21 @@ extension vChewing { strProcessed.removeLast() } - // Step 3: Add Formatted Pragma, the Sorted Header: + // Step 2: Add Formatted Pragma, the Sorted Header: if !pragmaResult { strProcessed = kPragmaHeader + "\n" + strProcessed // Add Sorted Header } - // Step 4: Deduplication. + // Step 3: Deduplication. let arrData = strProcessed.split(separator: "\n") // 下面兩行的 reversed 是首尾顛倒,免得破壞最新的 override 資訊。 let arrDataDeduplicated = Array(NSOrderedSet(array: arrData.reversed()).array as! [String]) strProcessed = arrDataDeduplicated.reversed().joined(separator: "\n") + "\n" - // Step 5: Remove duplicated newlines at the end of the file. - strProcessed.regReplace(pattern: "\\n+", replaceWith: "\n") + // Step 4: Remove duplicated newlines at the end of the file. + strProcessed.regReplace(pattern: #"\n+"#, replaceWith: "\n") - // Step 6: Write consolidated file contents. + // Step 5: Write consolidated file contents. try strProcessed.write(to: urlPath, atomically: false, encoding: .utf8) } catch { diff --git a/UserPhraseEditor/StringExtension.swift b/UserPhraseEditor/StringExtension.swift index 946852e8..747045a0 100644 --- a/UserPhraseEditor/StringExtension.swift +++ b/UserPhraseEditor/StringExtension.swift @@ -52,14 +52,18 @@ extension String { // Tab to ASCII Space // 統整連續空格為一個 ASCII 空格 strProcessed.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") - strProcessed.regReplace(pattern: #"(^ | $)"#, replaceWith: "") // 去除行尾行首空格 - strProcessed.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") // CR & FF to LF, 且去除重複行 + // 去除行尾行首空格 + strProcessed.regReplace(pattern: #"(^ | $)"#, replaceWith: "") + strProcessed.regReplace(pattern: #"(\n | \n)"#, replaceWith: "\n") + // CR & FF to LF, 且去除重複行 + strProcessed.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") if strProcessed.prefix(1) == " " { // 去除檔案開頭空格 strProcessed.removeFirst() } if strProcessed.suffix(1) == " " { // 去除檔案結尾空格 strProcessed.removeLast() } + if cnvHYPYtoBPMF { // Step 2: Convert HanyuPinyin to Bopomofo. // 漢語拼音轉注音,得先從最長的可能的拼音組合開始轉起,