From 7db65e6e2c268fa5cfa17ecbf5d2765b116bda54 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Tue, 10 May 2022 14:23:24 +0800 Subject: [PATCH 1/9] Megrez // Weighten nodes with longer span (Megrez v1.0.7). --- .../LanguageParsers/Megrez/1_Walker.swift | 31 ++++++++++++++++--- .../LanguageParsers/Megrez/2_Grid.swift | 20 ++++++------ .../LanguageParsers/Megrez/3_NodeAnchor.swift | 6 ++++ .../LanguageParsers/Megrez/4_Node.swift | 4 ++- 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/Source/Modules/LanguageParsers/Megrez/1_Walker.swift b/Source/Modules/LanguageParsers/Megrez/1_Walker.swift index 7c2a5051..429d78b5 100644 --- a/Source/Modules/LanguageParsers/Megrez/1_Walker.swift +++ b/Source/Modules/LanguageParsers/Megrez/1_Walker.swift @@ -31,21 +31,37 @@ extension Megrez { mutGrid = grid } - public func reverseWalk(at location: Int, score accumulatedScore: Double = 0.0) -> [NodeAnchor] { + public func reverseWalk(at location: Int, score accumulatedScore: Double = 0.0, nodesLimit: Int = 0) + -> [NodeAnchor] + { if location == 0 || location > mutGrid.width() { return [] as [NodeAnchor] } var paths: [[NodeAnchor]] = [] - let nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location) + var nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location) - for n in nodes { + nodes.sort { + $0.balancedScore > $1.balancedScore // 排序規則已經在 NodeAnchor 內定義了。 + } + + // 只檢查前 X 個 NodeAnchor 是否有 node。 + // 這裡有 abs 是為了防止有白癡填負數。 + var border: Int = nodes.count + if nodesLimit > 0 { + border = min(nodes.count, abs(nodesLimit)) + } + + for n in nodes[0..= 0 { + break + } } if !paths.isEmpty { diff --git a/Source/Modules/LanguageParsers/Megrez/2_Grid.swift b/Source/Modules/LanguageParsers/Megrez/2_Grid.swift index c391b425..355c6bf8 100644 --- a/Source/Modules/LanguageParsers/Megrez/2_Grid.swift +++ b/Source/Modules/LanguageParsers/Megrez/2_Grid.swift @@ -140,16 +140,15 @@ extension Megrez { public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor { var node = NodeAnchor() - let nodes = nodesCrossingOrEndingAt(location: location) - for nodeAnchor in nodes { + for (index, nodeAnchor) in nodesCrossingOrEndingAt(location: location).enumerated() { // Reset the candidate-fixed state of every node at the location. let candidates = nodeAnchor.node?.candidates() ?? [] - nodeAnchor.node?.resetCandidate() + nodesCrossingOrEndingAt(location: location)[index].node?.resetCandidate() for (i, candidate) in candidates.enumerated() { if candidate.value == value { - nodeAnchor.node?.selectCandidateAt(index: i) - node = nodeAnchor + nodesCrossingOrEndingAt(location: location)[index].node?.selectCandidateAt(index: i) + node = nodesCrossingOrEndingAt(location: location)[index] break } } @@ -158,18 +157,17 @@ extension Megrez { } public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) { - for nodeAnchor in nodesCrossingOrEndingAt(location: location) { - var nodeAnchor = nodeAnchor + for (index, nodeAnchor) in nodesCrossingOrEndingAt(location: location).enumerated() { if let theNode = nodeAnchor.node { let candidates = theNode.candidates() // Reset the candidate-fixed state of every node at the location. - theNode.resetCandidate() - nodeAnchor.node = theNode + nodesCrossingOrEndingAt(location: location)[index].node?.resetCandidate() for (i, candidate) in candidates.enumerated() { if candidate.value == value { - theNode.selectFloatingCandidateAt(index: i, score: overridingScore) - nodeAnchor.node = theNode + nodesCrossingOrEndingAt(location: location)[index].node?.selectFloatingCandidateAt( + index: i, score: overridingScore + ) break } } diff --git a/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift b/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift index 0a130a58..938262f4 100644 --- a/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift +++ b/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift @@ -32,5 +32,11 @@ extension Megrez { public var keyLength: Int { node?.key().count ?? 0 } + + public var balancedScore: Double { + let weightedScore: Double = (Double(spanningLength) - 1) * 2 + let nodeScore: Double = node?.score() ?? 0 + return weightedScore + nodeScore + } } } diff --git a/Source/Modules/LanguageParsers/Megrez/4_Node.swift b/Source/Modules/LanguageParsers/Megrez/4_Node.swift index fdf0838d..be518b3e 100644 --- a/Source/Modules/LanguageParsers/Megrez/4_Node.swift +++ b/Source/Modules/LanguageParsers/Megrez/4_Node.swift @@ -36,6 +36,8 @@ extension Megrez { var mutCandidateFixed: Bool = false var mutSelectedUnigramIndex: Int = 0 + let kSelectedCandidateScore: Double = 99 + public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { mutLM = LanguageModel() @@ -112,7 +114,7 @@ extension Megrez { public func selectCandidateAt(index: Int = 0, fix: Bool = false) { mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index mutCandidateFixed = fix - mutScore = 99 + mutScore = kSelectedCandidateScore } public func resetCandidate() { From 8984784bdbe7f20b2594d112ff79b1f17caa5a82 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Tue, 10 May 2022 14:23:39 +0800 Subject: [PATCH 2/9] KeyHandler // Set the limit of walked nodes count. --- Source/Modules/ControllerModules/KeyHandler_Core.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Modules/ControllerModules/KeyHandler_Core.swift b/Source/Modules/ControllerModules/KeyHandler_Core.swift index 79e7cee3..cc8567fe 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Core.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Core.swift @@ -121,7 +121,7 @@ class KeyHandler: NSObject { let walker = Megrez.Walker(grid: _builder.grid()) // the reverse walk traces the grid from the end - let walked: [Megrez.NodeAnchor] = walker.reverseWalk(at: _builder.grid().width()) + let walked: [Megrez.NodeAnchor] = walker.reverseWalk(at: _builder.grid().width(), nodesLimit: 10) // then we use ".reversed()" to reverse the nodes so that we get the forward-walked nodes _walkedNodes.removeAll() From 788b9a60eee091e329448dfc014dc3c6c5441c3d Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Tue, 10 May 2022 14:56:55 +0800 Subject: [PATCH 3/9] LMUserOverride // Fix wrong process in observe(). --- .../LangModelRelated/SubLMs/lmUserOverride.swift | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift index 24f74120..dd2157df 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift @@ -74,6 +74,9 @@ extension vChewing { public init(capacity: Int = 500, decayConstant: Double = 5400.0) { mutCapacity = abs(capacity) // Ensures that this value is always > 0. + if mutCapacity == 0 { + mutCapacity = 1 + } mutDecayExponent = log(0.5) / decayConstant } @@ -88,7 +91,7 @@ extension vChewing { else { return } - guard let map = mutLRUMap[key] else { + guard mutLRUMap[key] != nil else { var observation: Observation = .init() observation.update(candidate: candidate, timestamp: timestamp) mutLRUMap[key] = KeyObservationPair(key: key, observation: observation) @@ -100,10 +103,11 @@ extension vChewing { } return } - var obs = map.observation - obs.update(candidate: candidate, timestamp: timestamp) - let pair = KeyObservationPair(key: key, observation: obs) - mutLRUList.insert(pair, at: 0) + mutLRUList.insert(contentsOf: mutLRUMap.values, at: 0) + + if mutLRUMap[key] != nil { + mutLRUMap[key]?.observation.update(candidate: candidate, timestamp: timestamp) + } } public func suggest( From 4f881e44eabf899328dba8c5145714f4d5f18def Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Tue, 10 May 2022 18:10:35 +0800 Subject: [PATCH 4/9] LMs // Use split() to boost loading speed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 非常感謝李昇輯老師的提議。換掉 components() 之後真的變得超快。 --- .../Modules/LangModelRelated/SubLMs/lmAssociates.swift | 8 ++++---- Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift | 10 +++++----- .../LangModelRelated/SubLMs/lmReplacements.swift | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift index 495ca22d..e45554c6 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift @@ -53,9 +53,9 @@ extension vChewing { do { strData = try String(contentsOfFile: path, encoding: .utf8).replacingOccurrences(of: "\t", with: " ") strData.ranges(splitBy: "\n").forEach { - let neta = strData[$0].components(separatedBy: " ") + let neta = strData[$0].split(separator: " ") if neta.count >= 2 { - let theKey = neta[0] + let theKey = String(neta[0]) if !neta[0].isEmpty, !neta[1].isEmpty, theKey.first != "#" { let theValue = $0 rangeMap[theKey, default: []].append(theValue) @@ -94,8 +94,8 @@ extension vChewing { var pairs: [String] = [] if let arrRangeRecords: [Range] = rangeMap[key] { for netaRange in arrRangeRecords { - let neta = strData[netaRange].components(separatedBy: " ") - let theValue: String = neta[1] + let neta = strData[netaRange].split(separator: " ") + let theValue: String = String(neta[1]) pairs.append(theValue) } } diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift index 0f07eaaf..f9fb705e 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift @@ -71,9 +71,9 @@ extension vChewing { do { strData = try String(contentsOfFile: path, encoding: .utf8).replacingOccurrences(of: "\t", with: " ") strData.ranges(splitBy: "\n").forEach { - let neta = strData[$0].components(separatedBy: " ") + let neta = strData[$0].split(separator: " ") if neta.count >= 2 { - let theKey = shouldReverse ? neta[1] : neta[0] + let theKey = shouldReverse ? String(neta[1]) : String(neta[0]) if !neta[0].isEmpty, !neta[1].isEmpty, theKey.first != "#" { let theValue = $0 rangeMap[theKey, default: []].append(theValue) @@ -120,12 +120,12 @@ extension vChewing { var grams: [Megrez.Unigram] = [] if let arrRangeRecords: [Range] = rangeMap[key] { for netaRange in arrRangeRecords { - let neta = strData[netaRange].components(separatedBy: " ") - let theValue: String = shouldReverse ? neta[0] : neta[1] + let neta = strData[netaRange].split(separator: " ") + let theValue: String = shouldReverse ? String(neta[0]) : String(neta[1]) let kvPair = Megrez.KeyValuePair(key: key, value: theValue) var theScore = defaultScore if neta.count >= 3, !shouldForceDefaultScore { - theScore = .init(neta[2]) ?? defaultScore + theScore = .init(String(neta[2])) ?? defaultScore } if theScore > 0 { theScore *= -1 // 應對可能忘記寫負號的情形 diff --git a/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift index 989a7625..f70cb1d7 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift @@ -53,9 +53,9 @@ extension vChewing { do { strData = try String(contentsOfFile: path, encoding: .utf8).replacingOccurrences(of: "\t", with: " ") strData.ranges(splitBy: "\n").forEach { - let neta = strData[$0].components(separatedBy: " ") + let neta = strData[$0].split(separator: " ") if neta.count >= 2 { - let theKey = neta[0] + let theKey = String(neta[0]) if !neta[0].isEmpty, !neta[1].isEmpty, theKey.first != "#" { let theValue = $0 rangeMap[theKey] = theValue @@ -89,7 +89,7 @@ extension vChewing { guard let range = rangeMap[key] else { return "" } - let arrNeta = strData[range].components(separatedBy: " ") + let arrNeta = strData[range].split(separator: " ") guard arrNeta.count >= 2 else { return "" } From 481431c8e3473066096bc09a4158ff7a70956be4 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Tue, 10 May 2022 18:35:18 +0800 Subject: [PATCH 5/9] UPE // Optimize data handling process. --- UserPhraseEditor/StringExtension.swift | 882 ++++++++++++------------- 1 file changed, 435 insertions(+), 447 deletions(-) diff --git a/UserPhraseEditor/StringExtension.swift b/UserPhraseEditor/StringExtension.swift index e05e1661..946852e8 100644 --- a/UserPhraseEditor/StringExtension.swift +++ b/UserPhraseEditor/StringExtension.swift @@ -60,448 +60,440 @@ extension String { if strProcessed.suffix(1) == " " { // 去除檔案結尾空格 strProcessed.removeLast() } - var arrData = [""] if cnvHYPYtoBPMF { - // Step 0: Convert HanyuPinyin to Bopomofo. - arrData = strProcessed.components(separatedBy: "\n") - strProcessed = "" // Reset its value - for lineData in arrData { - var varLineData = lineData - // 漢語拼音轉注音,得先從最長的可能的拼音組合開始轉起, - // 這樣等轉換到更短的可能的漢語拼音組合時就不會出錯。 - // 依此類推,聲調放在最後來轉換。 - varLineData.selfReplace("chuang", "ㄔㄨㄤ") - varLineData.selfReplace("shuang", "ㄕㄨㄤ") - varLineData.selfReplace("zhuang", "ㄓㄨㄤ") - varLineData.selfReplace("chang", "ㄔㄤ") - varLineData.selfReplace("cheng", "ㄔㄥ") - varLineData.selfReplace("chong", "ㄔㄨㄥ") - varLineData.selfReplace("chuai", "ㄔㄨㄞ") - varLineData.selfReplace("chuan", "ㄔㄨㄢ") - varLineData.selfReplace("guang", "ㄍㄨㄤ") - varLineData.selfReplace("huang", "ㄏㄨㄤ") - varLineData.selfReplace("jiang", "ㄐㄧㄤ") - varLineData.selfReplace("jiong", "ㄐㄩㄥ") - varLineData.selfReplace("kuang", "ㄎㄨㄤ") - varLineData.selfReplace("liang", "ㄌㄧㄤ") - varLineData.selfReplace("niang", "ㄋㄧㄤ") - varLineData.selfReplace("qiang", "ㄑㄧㄤ") - varLineData.selfReplace("qiong", "ㄑㄩㄥ") - varLineData.selfReplace("shang", "ㄕㄤ") - varLineData.selfReplace("sheng", "ㄕㄥ") - varLineData.selfReplace("shuai", "ㄕㄨㄞ") - varLineData.selfReplace("shuan", "ㄕㄨㄢ") - varLineData.selfReplace("xiang", "ㄒㄧㄤ") - varLineData.selfReplace("xiong", "ㄒㄩㄥ") - varLineData.selfReplace("zhang", "ㄓㄤ") - varLineData.selfReplace("zheng", "ㄓㄥ") - varLineData.selfReplace("zhong", "ㄓㄨㄥ") - varLineData.selfReplace("zhuai", "ㄓㄨㄞ") - varLineData.selfReplace("zhuan", "ㄓㄨㄢ") - varLineData.selfReplace("bang", "ㄅㄤ") - varLineData.selfReplace("beng", "ㄅㄥ") - varLineData.selfReplace("bian", "ㄅㄧㄢ") - varLineData.selfReplace("biao", "ㄅㄧㄠ") - varLineData.selfReplace("bing", "ㄅㄧㄥ") - varLineData.selfReplace("cang", "ㄘㄤ") - varLineData.selfReplace("ceng", "ㄘㄥ") - varLineData.selfReplace("chai", "ㄔㄞ") - varLineData.selfReplace("chan", "ㄔㄢ") - varLineData.selfReplace("chao", "ㄔㄠ") - varLineData.selfReplace("chen", "ㄔㄣ") - varLineData.selfReplace("chou", "ㄔㄡ") - varLineData.selfReplace("chua", "ㄔㄨㄚ") - varLineData.selfReplace("chui", "ㄔㄨㄟ") - varLineData.selfReplace("chun", "ㄔㄨㄣ") - varLineData.selfReplace("chuo", "ㄔㄨㄛ") - varLineData.selfReplace("cong", "ㄘㄨㄥ") - varLineData.selfReplace("cuan", "ㄘㄨㄢ") - varLineData.selfReplace("dang", "ㄉㄤ") - varLineData.selfReplace("deng", "ㄉㄥ") - varLineData.selfReplace("dian", "ㄉㄧㄢ") - varLineData.selfReplace("diao", "ㄉㄧㄠ") - varLineData.selfReplace("ding", "ㄉㄧㄥ") - varLineData.selfReplace("dong", "ㄉㄨㄥ") - varLineData.selfReplace("duan", "ㄉㄨㄢ") - varLineData.selfReplace("fang", "ㄈㄤ") - varLineData.selfReplace("feng", "ㄈㄥ") - varLineData.selfReplace("fiao", "ㄈㄧㄠ") - varLineData.selfReplace("fong", "ㄈㄨㄥ") - varLineData.selfReplace("gang", "ㄍㄤ") - varLineData.selfReplace("geng", "ㄍㄥ") - varLineData.selfReplace("giao", "ㄍㄧㄠ") - varLineData.selfReplace("gong", "ㄍㄨㄥ") - varLineData.selfReplace("guai", "ㄍㄨㄞ") - varLineData.selfReplace("guan", "ㄍㄨㄢ") - varLineData.selfReplace("hang", "ㄏㄤ") - varLineData.selfReplace("heng", "ㄏㄥ") - varLineData.selfReplace("hong", "ㄏㄨㄥ") - varLineData.selfReplace("huai", "ㄏㄨㄞ") - varLineData.selfReplace("huan", "ㄏㄨㄢ") - varLineData.selfReplace("jian", "ㄐㄧㄢ") - varLineData.selfReplace("jiao", "ㄐㄧㄠ") - varLineData.selfReplace("jing", "ㄐㄧㄥ") - varLineData.selfReplace("juan", "ㄐㄩㄢ") - varLineData.selfReplace("kang", "ㄎㄤ") - varLineData.selfReplace("keng", "ㄎㄥ") - varLineData.selfReplace("kong", "ㄎㄨㄥ") - varLineData.selfReplace("kuai", "ㄎㄨㄞ") - varLineData.selfReplace("kuan", "ㄎㄨㄢ") - varLineData.selfReplace("lang", "ㄌㄤ") - varLineData.selfReplace("leng", "ㄌㄥ") - varLineData.selfReplace("lian", "ㄌㄧㄢ") - varLineData.selfReplace("liao", "ㄌㄧㄠ") - varLineData.selfReplace("ling", "ㄌㄧㄥ") - varLineData.selfReplace("long", "ㄌㄨㄥ") - varLineData.selfReplace("luan", "ㄌㄨㄢ") - varLineData.selfReplace("lvan", "ㄌㄩㄢ") - varLineData.selfReplace("mang", "ㄇㄤ") - varLineData.selfReplace("meng", "ㄇㄥ") - varLineData.selfReplace("mian", "ㄇㄧㄢ") - varLineData.selfReplace("miao", "ㄇㄧㄠ") - varLineData.selfReplace("ming", "ㄇㄧㄥ") - varLineData.selfReplace("nang", "ㄋㄤ") - varLineData.selfReplace("neng", "ㄋㄥ") - varLineData.selfReplace("nian", "ㄋㄧㄢ") - varLineData.selfReplace("niao", "ㄋㄧㄠ") - varLineData.selfReplace("ning", "ㄋㄧㄥ") - varLineData.selfReplace("nong", "ㄋㄨㄥ") - varLineData.selfReplace("nuan", "ㄋㄨㄢ") - varLineData.selfReplace("pang", "ㄆㄤ") - varLineData.selfReplace("peng", "ㄆㄥ") - varLineData.selfReplace("pian", "ㄆㄧㄢ") - varLineData.selfReplace("piao", "ㄆㄧㄠ") - varLineData.selfReplace("ping", "ㄆㄧㄥ") - varLineData.selfReplace("qian", "ㄑㄧㄢ") - varLineData.selfReplace("qiao", "ㄑㄧㄠ") - varLineData.selfReplace("qing", "ㄑㄧㄥ") - varLineData.selfReplace("quan", "ㄑㄩㄢ") - varLineData.selfReplace("rang", "ㄖㄤ") - varLineData.selfReplace("reng", "ㄖㄥ") - varLineData.selfReplace("rong", "ㄖㄨㄥ") - varLineData.selfReplace("ruan", "ㄖㄨㄢ") - varLineData.selfReplace("sang", "ㄙㄤ") - varLineData.selfReplace("seng", "ㄙㄥ") - varLineData.selfReplace("shai", "ㄕㄞ") - varLineData.selfReplace("shan", "ㄕㄢ") - varLineData.selfReplace("shao", "ㄕㄠ") - varLineData.selfReplace("shei", "ㄕㄟ") - varLineData.selfReplace("shen", "ㄕㄣ") - varLineData.selfReplace("shou", "ㄕㄡ") - varLineData.selfReplace("shua", "ㄕㄨㄚ") - varLineData.selfReplace("shui", "ㄕㄨㄟ") - varLineData.selfReplace("shun", "ㄕㄨㄣ") - varLineData.selfReplace("shuo", "ㄕㄨㄛ") - varLineData.selfReplace("song", "ㄙㄨㄥ") - varLineData.selfReplace("suan", "ㄙㄨㄢ") - varLineData.selfReplace("tang", "ㄊㄤ") - varLineData.selfReplace("teng", "ㄊㄥ") - varLineData.selfReplace("tian", "ㄊㄧㄢ") - varLineData.selfReplace("tiao", "ㄊㄧㄠ") - varLineData.selfReplace("ting", "ㄊㄧㄥ") - varLineData.selfReplace("tong", "ㄊㄨㄥ") - varLineData.selfReplace("tuan", "ㄊㄨㄢ") - varLineData.selfReplace("wang", "ㄨㄤ") - varLineData.selfReplace("weng", "ㄨㄥ") - varLineData.selfReplace("xian", "ㄒㄧㄢ") - varLineData.selfReplace("xiao", "ㄒㄧㄠ") - varLineData.selfReplace("xing", "ㄒㄧㄥ") - varLineData.selfReplace("xuan", "ㄒㄩㄢ") - varLineData.selfReplace("yang", "ㄧㄤ") - varLineData.selfReplace("ying", "ㄧㄥ") - varLineData.selfReplace("yong", "ㄩㄥ") - varLineData.selfReplace("yuan", "ㄩㄢ") - varLineData.selfReplace("zang", "ㄗㄤ") - varLineData.selfReplace("zeng", "ㄗㄥ") - varLineData.selfReplace("zhai", "ㄓㄞ") - varLineData.selfReplace("zhan", "ㄓㄢ") - varLineData.selfReplace("zhao", "ㄓㄠ") - varLineData.selfReplace("zhei", "ㄓㄟ") - varLineData.selfReplace("zhen", "ㄓㄣ") - varLineData.selfReplace("zhou", "ㄓㄡ") - varLineData.selfReplace("zhua", "ㄓㄨㄚ") - varLineData.selfReplace("zhui", "ㄓㄨㄟ") - varLineData.selfReplace("zhun", "ㄓㄨㄣ") - varLineData.selfReplace("zhuo", "ㄓㄨㄛ") - varLineData.selfReplace("zong", "ㄗㄨㄥ") - varLineData.selfReplace("zuan", "ㄗㄨㄢ") - varLineData.selfReplace("jun", "ㄐㄩㄣ") - varLineData.selfReplace("ang", "ㄤ") - varLineData.selfReplace("bai", "ㄅㄞ") - varLineData.selfReplace("ban", "ㄅㄢ") - varLineData.selfReplace("bao", "ㄅㄠ") - varLineData.selfReplace("bei", "ㄅㄟ") - varLineData.selfReplace("ben", "ㄅㄣ") - varLineData.selfReplace("bie", "ㄅㄧㄝ") - varLineData.selfReplace("bin", "ㄅㄧㄣ") - varLineData.selfReplace("cai", "ㄘㄞ") - varLineData.selfReplace("can", "ㄘㄢ") - varLineData.selfReplace("cao", "ㄘㄠ") - varLineData.selfReplace("cei", "ㄘㄟ") - varLineData.selfReplace("cen", "ㄘㄣ") - varLineData.selfReplace("cha", "ㄔㄚ") - varLineData.selfReplace("che", "ㄔㄜ") - varLineData.selfReplace("chi", "ㄔ") - varLineData.selfReplace("chu", "ㄔㄨ") - varLineData.selfReplace("cou", "ㄘㄡ") - varLineData.selfReplace("cui", "ㄘㄨㄟ") - varLineData.selfReplace("cun", "ㄘㄨㄣ") - varLineData.selfReplace("cuo", "ㄘㄨㄛ") - varLineData.selfReplace("dai", "ㄉㄞ") - varLineData.selfReplace("dan", "ㄉㄢ") - varLineData.selfReplace("dao", "ㄉㄠ") - varLineData.selfReplace("dei", "ㄉㄟ") - varLineData.selfReplace("den", "ㄉㄣ") - varLineData.selfReplace("dia", "ㄉㄧㄚ") - varLineData.selfReplace("die", "ㄉㄧㄝ") - varLineData.selfReplace("diu", "ㄉㄧㄡ") - varLineData.selfReplace("dou", "ㄉㄡ") - varLineData.selfReplace("dui", "ㄉㄨㄟ") - varLineData.selfReplace("dun", "ㄉㄨㄣ") - varLineData.selfReplace("duo", "ㄉㄨㄛ") - varLineData.selfReplace("eng", "ㄥ") - varLineData.selfReplace("fan", "ㄈㄢ") - varLineData.selfReplace("fei", "ㄈㄟ") - varLineData.selfReplace("fen", "ㄈㄣ") - varLineData.selfReplace("fou", "ㄈㄡ") - varLineData.selfReplace("gai", "ㄍㄞ") - varLineData.selfReplace("gan", "ㄍㄢ") - varLineData.selfReplace("gao", "ㄍㄠ") - varLineData.selfReplace("gei", "ㄍㄟ") - varLineData.selfReplace("gin", "ㄍㄧㄣ") - varLineData.selfReplace("gen", "ㄍㄣ") - varLineData.selfReplace("gou", "ㄍㄡ") - varLineData.selfReplace("gua", "ㄍㄨㄚ") - varLineData.selfReplace("gue", "ㄍㄨㄜ") - varLineData.selfReplace("gui", "ㄍㄨㄟ") - varLineData.selfReplace("gun", "ㄍㄨㄣ") - varLineData.selfReplace("guo", "ㄍㄨㄛ") - varLineData.selfReplace("hai", "ㄏㄞ") - varLineData.selfReplace("han", "ㄏㄢ") - varLineData.selfReplace("hao", "ㄏㄠ") - varLineData.selfReplace("hei", "ㄏㄟ") - varLineData.selfReplace("hen", "ㄏㄣ") - varLineData.selfReplace("hou", "ㄏㄡ") - varLineData.selfReplace("hua", "ㄏㄨㄚ") - varLineData.selfReplace("hui", "ㄏㄨㄟ") - varLineData.selfReplace("hun", "ㄏㄨㄣ") - varLineData.selfReplace("huo", "ㄏㄨㄛ") - varLineData.selfReplace("jia", "ㄐㄧㄚ") - varLineData.selfReplace("jie", "ㄐㄧㄝ") - varLineData.selfReplace("jin", "ㄐㄧㄣ") - varLineData.selfReplace("jiu", "ㄐㄧㄡ") - varLineData.selfReplace("jue", "ㄐㄩㄝ") - varLineData.selfReplace("kai", "ㄎㄞ") - varLineData.selfReplace("kan", "ㄎㄢ") - varLineData.selfReplace("kao", "ㄎㄠ") - varLineData.selfReplace("ken", "ㄎㄣ") - varLineData.selfReplace("kiu", "ㄎㄧㄡ") - varLineData.selfReplace("kou", "ㄎㄡ") - varLineData.selfReplace("kua", "ㄎㄨㄚ") - varLineData.selfReplace("kui", "ㄎㄨㄟ") - varLineData.selfReplace("kun", "ㄎㄨㄣ") - varLineData.selfReplace("kuo", "ㄎㄨㄛ") - varLineData.selfReplace("lai", "ㄌㄞ") - varLineData.selfReplace("lan", "ㄌㄢ") - varLineData.selfReplace("lao", "ㄌㄠ") - varLineData.selfReplace("lei", "ㄌㄟ") - varLineData.selfReplace("lia", "ㄌㄧㄚ") - varLineData.selfReplace("lie", "ㄌㄧㄝ") - varLineData.selfReplace("lin", "ㄌㄧㄣ") - varLineData.selfReplace("liu", "ㄌㄧㄡ") - varLineData.selfReplace("lou", "ㄌㄡ") - varLineData.selfReplace("lun", "ㄌㄨㄣ") - varLineData.selfReplace("luo", "ㄌㄨㄛ") - varLineData.selfReplace("lve", "ㄌㄩㄝ") - varLineData.selfReplace("mai", "ㄇㄞ") - varLineData.selfReplace("man", "ㄇㄢ") - varLineData.selfReplace("mao", "ㄇㄠ") - varLineData.selfReplace("mei", "ㄇㄟ") - varLineData.selfReplace("men", "ㄇㄣ") - varLineData.selfReplace("mie", "ㄇㄧㄝ") - varLineData.selfReplace("min", "ㄇㄧㄣ") - varLineData.selfReplace("miu", "ㄇㄧㄡ") - varLineData.selfReplace("mou", "ㄇㄡ") - varLineData.selfReplace("nai", "ㄋㄞ") - varLineData.selfReplace("nan", "ㄋㄢ") - varLineData.selfReplace("nao", "ㄋㄠ") - varLineData.selfReplace("nei", "ㄋㄟ") - varLineData.selfReplace("nen", "ㄋㄣ") - varLineData.selfReplace("nie", "ㄋㄧㄝ") - varLineData.selfReplace("nin", "ㄋㄧㄣ") - varLineData.selfReplace("niu", "ㄋㄧㄡ") - varLineData.selfReplace("nou", "ㄋㄡ") - varLineData.selfReplace("nui", "ㄋㄨㄟ") - varLineData.selfReplace("nun", "ㄋㄨㄣ") - varLineData.selfReplace("nuo", "ㄋㄨㄛ") - varLineData.selfReplace("nve", "ㄋㄩㄝ") - varLineData.selfReplace("pai", "ㄆㄞ") - varLineData.selfReplace("pan", "ㄆㄢ") - varLineData.selfReplace("pao", "ㄆㄠ") - varLineData.selfReplace("pei", "ㄆㄟ") - varLineData.selfReplace("pen", "ㄆㄣ") - varLineData.selfReplace("pia", "ㄆㄧㄚ") - varLineData.selfReplace("pie", "ㄆㄧㄝ") - varLineData.selfReplace("pin", "ㄆㄧㄣ") - varLineData.selfReplace("pou", "ㄆㄡ") - varLineData.selfReplace("qia", "ㄑㄧㄚ") - varLineData.selfReplace("qie", "ㄑㄧㄝ") - varLineData.selfReplace("qin", "ㄑㄧㄣ") - varLineData.selfReplace("qiu", "ㄑㄧㄡ") - varLineData.selfReplace("que", "ㄑㄩㄝ") - varLineData.selfReplace("qun", "ㄑㄩㄣ") - varLineData.selfReplace("ran", "ㄖㄢ") - varLineData.selfReplace("rao", "ㄖㄠ") - varLineData.selfReplace("ren", "ㄖㄣ") - varLineData.selfReplace("rou", "ㄖㄡ") - varLineData.selfReplace("rui", "ㄖㄨㄟ") - varLineData.selfReplace("run", "ㄖㄨㄣ") - varLineData.selfReplace("ruo", "ㄖㄨㄛ") - varLineData.selfReplace("sai", "ㄙㄞ") - varLineData.selfReplace("san", "ㄙㄢ") - varLineData.selfReplace("sao", "ㄙㄠ") - varLineData.selfReplace("sei", "ㄙㄟ") - varLineData.selfReplace("sen", "ㄙㄣ") - varLineData.selfReplace("sha", "ㄕㄚ") - varLineData.selfReplace("she", "ㄕㄜ") - varLineData.selfReplace("shi", "ㄕ") - varLineData.selfReplace("shu", "ㄕㄨ") - varLineData.selfReplace("sou", "ㄙㄡ") - varLineData.selfReplace("sui", "ㄙㄨㄟ") - varLineData.selfReplace("sun", "ㄙㄨㄣ") - varLineData.selfReplace("suo", "ㄙㄨㄛ") - varLineData.selfReplace("tai", "ㄊㄞ") - varLineData.selfReplace("tan", "ㄊㄢ") - varLineData.selfReplace("tao", "ㄊㄠ") - varLineData.selfReplace("tie", "ㄊㄧㄝ") - varLineData.selfReplace("tou", "ㄊㄡ") - varLineData.selfReplace("tui", "ㄊㄨㄟ") - varLineData.selfReplace("tun", "ㄊㄨㄣ") - varLineData.selfReplace("tuo", "ㄊㄨㄛ") - varLineData.selfReplace("wai", "ㄨㄞ") - varLineData.selfReplace("wan", "ㄨㄢ") - varLineData.selfReplace("wei", "ㄨㄟ") - varLineData.selfReplace("wen", "ㄨㄣ") - varLineData.selfReplace("xia", "ㄒㄧㄚ") - varLineData.selfReplace("xie", "ㄒㄧㄝ") - varLineData.selfReplace("xin", "ㄒㄧㄣ") - varLineData.selfReplace("xiu", "ㄒㄧㄡ") - varLineData.selfReplace("xue", "ㄒㄩㄝ") - varLineData.selfReplace("xun", "ㄒㄩㄣ") - varLineData.selfReplace("yai", "ㄧㄞ") - varLineData.selfReplace("yan", "ㄧㄢ") - varLineData.selfReplace("yao", "ㄧㄠ") - varLineData.selfReplace("yin", "ㄧㄣ") - varLineData.selfReplace("you", "ㄧㄡ") - varLineData.selfReplace("yue", "ㄩㄝ") - varLineData.selfReplace("yun", "ㄩㄣ") - varLineData.selfReplace("zai", "ㄗㄞ") - varLineData.selfReplace("zan", "ㄗㄢ") - varLineData.selfReplace("zao", "ㄗㄠ") - varLineData.selfReplace("zei", "ㄗㄟ") - varLineData.selfReplace("zen", "ㄗㄣ") - varLineData.selfReplace("zha", "ㄓㄚ") - varLineData.selfReplace("zhe", "ㄓㄜ") - varLineData.selfReplace("zhi", "ㄓ") - varLineData.selfReplace("zhu", "ㄓㄨ") - varLineData.selfReplace("zou", "ㄗㄡ") - varLineData.selfReplace("zui", "ㄗㄨㄟ") - varLineData.selfReplace("zun", "ㄗㄨㄣ") - varLineData.selfReplace("zuo", "ㄗㄨㄛ") - varLineData.selfReplace("ai", "ㄞ") - varLineData.selfReplace("an", "ㄢ") - varLineData.selfReplace("ao", "ㄠ") - varLineData.selfReplace("ba", "ㄅㄚ") - varLineData.selfReplace("bi", "ㄅㄧ") - varLineData.selfReplace("bo", "ㄅㄛ") - varLineData.selfReplace("bu", "ㄅㄨ") - varLineData.selfReplace("ca", "ㄘㄚ") - varLineData.selfReplace("ce", "ㄘㄜ") - varLineData.selfReplace("ci", "ㄘ") - varLineData.selfReplace("cu", "ㄘㄨ") - varLineData.selfReplace("da", "ㄉㄚ") - varLineData.selfReplace("de", "ㄉㄜ") - varLineData.selfReplace("di", "ㄉㄧ") - varLineData.selfReplace("du", "ㄉㄨ") - varLineData.selfReplace("eh", "ㄝ") - varLineData.selfReplace("ei", "ㄟ") - varLineData.selfReplace("en", "ㄣ") - varLineData.selfReplace("er", "ㄦ") - varLineData.selfReplace("fa", "ㄈㄚ") - varLineData.selfReplace("fo", "ㄈㄛ") - varLineData.selfReplace("fu", "ㄈㄨ") - varLineData.selfReplace("ga", "ㄍㄚ") - varLineData.selfReplace("ge", "ㄍㄜ") - varLineData.selfReplace("gi", "ㄍㄧ") - varLineData.selfReplace("gu", "ㄍㄨ") - varLineData.selfReplace("ha", "ㄏㄚ") - varLineData.selfReplace("he", "ㄏㄜ") - varLineData.selfReplace("hu", "ㄏㄨ") - varLineData.selfReplace("ji", "ㄐㄧ") - varLineData.selfReplace("ju", "ㄐㄩ") - varLineData.selfReplace("ka", "ㄎㄚ") - varLineData.selfReplace("ke", "ㄎㄜ") - varLineData.selfReplace("ku", "ㄎㄨ") - varLineData.selfReplace("la", "ㄌㄚ") - varLineData.selfReplace("le", "ㄌㄜ") - varLineData.selfReplace("li", "ㄌㄧ") - varLineData.selfReplace("lo", "ㄌㄛ") - varLineData.selfReplace("lu", "ㄌㄨ") - varLineData.selfReplace("lv", "ㄌㄩ") - varLineData.selfReplace("ma", "ㄇㄚ") - varLineData.selfReplace("me", "ㄇㄜ") - varLineData.selfReplace("mi", "ㄇㄧ") - varLineData.selfReplace("mo", "ㄇㄛ") - varLineData.selfReplace("mu", "ㄇㄨ") - varLineData.selfReplace("na", "ㄋㄚ") - varLineData.selfReplace("ne", "ㄋㄜ") - varLineData.selfReplace("ni", "ㄋㄧ") - varLineData.selfReplace("nu", "ㄋㄨ") - varLineData.selfReplace("nv", "ㄋㄩ") - varLineData.selfReplace("ou", "ㄡ") - varLineData.selfReplace("pa", "ㄆㄚ") - varLineData.selfReplace("pi", "ㄆㄧ") - varLineData.selfReplace("po", "ㄆㄛ") - varLineData.selfReplace("pu", "ㄆㄨ") - varLineData.selfReplace("qi", "ㄑㄧ") - varLineData.selfReplace("qu", "ㄑㄩ") - varLineData.selfReplace("re", "ㄖㄜ") - varLineData.selfReplace("ri", "ㄖ") - varLineData.selfReplace("ru", "ㄖㄨ") - varLineData.selfReplace("sa", "ㄙㄚ") - varLineData.selfReplace("se", "ㄙㄜ") - varLineData.selfReplace("si", "ㄙ") - varLineData.selfReplace("su", "ㄙㄨ") - varLineData.selfReplace("ta", "ㄊㄚ") - varLineData.selfReplace("te", "ㄊㄜ") - varLineData.selfReplace("ti", "ㄊㄧ") - varLineData.selfReplace("tu", "ㄊㄨ") - varLineData.selfReplace("wa", "ㄨㄚ") - varLineData.selfReplace("wo", "ㄨㄛ") - varLineData.selfReplace("wu", "ㄨ") - varLineData.selfReplace("xi", "ㄒㄧ") - varLineData.selfReplace("xu", "ㄒㄩ") - varLineData.selfReplace("ya", "ㄧㄚ") - varLineData.selfReplace("ye", "ㄧㄝ") - varLineData.selfReplace("yi", "ㄧ") - varLineData.selfReplace("yo", "ㄧㄛ") - varLineData.selfReplace("yu", "ㄩ") - varLineData.selfReplace("za", "ㄗㄚ") - varLineData.selfReplace("ze", "ㄗㄜ") - varLineData.selfReplace("zi", "ㄗ") - varLineData.selfReplace("zu", "ㄗㄨ") - varLineData.selfReplace("a", "ㄚ") - varLineData.selfReplace("e", "ㄜ") - varLineData.selfReplace("o", "ㄛ") - varLineData.selfReplace("q", "ㄑ") - varLineData.selfReplace("2", "ˊ") - varLineData.selfReplace("3", "ˇ") - varLineData.selfReplace("4", "ˋ") - varLineData.selfReplace("5", "˙") - varLineData.selfReplace("1", "") - strProcessed += varLineData - strProcessed += "\n" - } + // Step 2: Convert HanyuPinyin to Bopomofo. + // 漢語拼音轉注音,得先從最長的可能的拼音組合開始轉起, + // 這樣等轉換到更短的可能的漢語拼音組合時就不會出錯。 + // 依此類推,聲調放在最後來轉換。 + strProcessed.selfReplace("chuang", "ㄔㄨㄤ") + strProcessed.selfReplace("shuang", "ㄕㄨㄤ") + strProcessed.selfReplace("zhuang", "ㄓㄨㄤ") + strProcessed.selfReplace("chang", "ㄔㄤ") + strProcessed.selfReplace("cheng", "ㄔㄥ") + strProcessed.selfReplace("chong", "ㄔㄨㄥ") + strProcessed.selfReplace("chuai", "ㄔㄨㄞ") + strProcessed.selfReplace("chuan", "ㄔㄨㄢ") + strProcessed.selfReplace("guang", "ㄍㄨㄤ") + strProcessed.selfReplace("huang", "ㄏㄨㄤ") + strProcessed.selfReplace("jiang", "ㄐㄧㄤ") + strProcessed.selfReplace("jiong", "ㄐㄩㄥ") + strProcessed.selfReplace("kuang", "ㄎㄨㄤ") + strProcessed.selfReplace("liang", "ㄌㄧㄤ") + strProcessed.selfReplace("niang", "ㄋㄧㄤ") + strProcessed.selfReplace("qiang", "ㄑㄧㄤ") + strProcessed.selfReplace("qiong", "ㄑㄩㄥ") + strProcessed.selfReplace("shang", "ㄕㄤ") + strProcessed.selfReplace("sheng", "ㄕㄥ") + strProcessed.selfReplace("shuai", "ㄕㄨㄞ") + strProcessed.selfReplace("shuan", "ㄕㄨㄢ") + strProcessed.selfReplace("xiang", "ㄒㄧㄤ") + strProcessed.selfReplace("xiong", "ㄒㄩㄥ") + strProcessed.selfReplace("zhang", "ㄓㄤ") + strProcessed.selfReplace("zheng", "ㄓㄥ") + strProcessed.selfReplace("zhong", "ㄓㄨㄥ") + strProcessed.selfReplace("zhuai", "ㄓㄨㄞ") + strProcessed.selfReplace("zhuan", "ㄓㄨㄢ") + strProcessed.selfReplace("bang", "ㄅㄤ") + strProcessed.selfReplace("beng", "ㄅㄥ") + strProcessed.selfReplace("bian", "ㄅㄧㄢ") + strProcessed.selfReplace("biao", "ㄅㄧㄠ") + strProcessed.selfReplace("bing", "ㄅㄧㄥ") + strProcessed.selfReplace("cang", "ㄘㄤ") + strProcessed.selfReplace("ceng", "ㄘㄥ") + strProcessed.selfReplace("chai", "ㄔㄞ") + strProcessed.selfReplace("chan", "ㄔㄢ") + strProcessed.selfReplace("chao", "ㄔㄠ") + strProcessed.selfReplace("chen", "ㄔㄣ") + strProcessed.selfReplace("chou", "ㄔㄡ") + strProcessed.selfReplace("chua", "ㄔㄨㄚ") + strProcessed.selfReplace("chui", "ㄔㄨㄟ") + strProcessed.selfReplace("chun", "ㄔㄨㄣ") + strProcessed.selfReplace("chuo", "ㄔㄨㄛ") + strProcessed.selfReplace("cong", "ㄘㄨㄥ") + strProcessed.selfReplace("cuan", "ㄘㄨㄢ") + strProcessed.selfReplace("dang", "ㄉㄤ") + strProcessed.selfReplace("deng", "ㄉㄥ") + strProcessed.selfReplace("dian", "ㄉㄧㄢ") + strProcessed.selfReplace("diao", "ㄉㄧㄠ") + strProcessed.selfReplace("ding", "ㄉㄧㄥ") + strProcessed.selfReplace("dong", "ㄉㄨㄥ") + strProcessed.selfReplace("duan", "ㄉㄨㄢ") + strProcessed.selfReplace("fang", "ㄈㄤ") + strProcessed.selfReplace("feng", "ㄈㄥ") + strProcessed.selfReplace("fiao", "ㄈㄧㄠ") + strProcessed.selfReplace("fong", "ㄈㄨㄥ") + strProcessed.selfReplace("gang", "ㄍㄤ") + strProcessed.selfReplace("geng", "ㄍㄥ") + strProcessed.selfReplace("giao", "ㄍㄧㄠ") + strProcessed.selfReplace("gong", "ㄍㄨㄥ") + strProcessed.selfReplace("guai", "ㄍㄨㄞ") + strProcessed.selfReplace("guan", "ㄍㄨㄢ") + strProcessed.selfReplace("hang", "ㄏㄤ") + strProcessed.selfReplace("heng", "ㄏㄥ") + strProcessed.selfReplace("hong", "ㄏㄨㄥ") + strProcessed.selfReplace("huai", "ㄏㄨㄞ") + strProcessed.selfReplace("huan", "ㄏㄨㄢ") + strProcessed.selfReplace("jian", "ㄐㄧㄢ") + strProcessed.selfReplace("jiao", "ㄐㄧㄠ") + strProcessed.selfReplace("jing", "ㄐㄧㄥ") + strProcessed.selfReplace("juan", "ㄐㄩㄢ") + strProcessed.selfReplace("kang", "ㄎㄤ") + strProcessed.selfReplace("keng", "ㄎㄥ") + strProcessed.selfReplace("kong", "ㄎㄨㄥ") + strProcessed.selfReplace("kuai", "ㄎㄨㄞ") + strProcessed.selfReplace("kuan", "ㄎㄨㄢ") + strProcessed.selfReplace("lang", "ㄌㄤ") + strProcessed.selfReplace("leng", "ㄌㄥ") + strProcessed.selfReplace("lian", "ㄌㄧㄢ") + strProcessed.selfReplace("liao", "ㄌㄧㄠ") + strProcessed.selfReplace("ling", "ㄌㄧㄥ") + strProcessed.selfReplace("long", "ㄌㄨㄥ") + strProcessed.selfReplace("luan", "ㄌㄨㄢ") + strProcessed.selfReplace("lvan", "ㄌㄩㄢ") + strProcessed.selfReplace("mang", "ㄇㄤ") + strProcessed.selfReplace("meng", "ㄇㄥ") + strProcessed.selfReplace("mian", "ㄇㄧㄢ") + strProcessed.selfReplace("miao", "ㄇㄧㄠ") + strProcessed.selfReplace("ming", "ㄇㄧㄥ") + strProcessed.selfReplace("nang", "ㄋㄤ") + strProcessed.selfReplace("neng", "ㄋㄥ") + strProcessed.selfReplace("nian", "ㄋㄧㄢ") + strProcessed.selfReplace("niao", "ㄋㄧㄠ") + strProcessed.selfReplace("ning", "ㄋㄧㄥ") + strProcessed.selfReplace("nong", "ㄋㄨㄥ") + strProcessed.selfReplace("nuan", "ㄋㄨㄢ") + strProcessed.selfReplace("pang", "ㄆㄤ") + strProcessed.selfReplace("peng", "ㄆㄥ") + strProcessed.selfReplace("pian", "ㄆㄧㄢ") + strProcessed.selfReplace("piao", "ㄆㄧㄠ") + strProcessed.selfReplace("ping", "ㄆㄧㄥ") + strProcessed.selfReplace("qian", "ㄑㄧㄢ") + strProcessed.selfReplace("qiao", "ㄑㄧㄠ") + strProcessed.selfReplace("qing", "ㄑㄧㄥ") + strProcessed.selfReplace("quan", "ㄑㄩㄢ") + strProcessed.selfReplace("rang", "ㄖㄤ") + strProcessed.selfReplace("reng", "ㄖㄥ") + strProcessed.selfReplace("rong", "ㄖㄨㄥ") + strProcessed.selfReplace("ruan", "ㄖㄨㄢ") + strProcessed.selfReplace("sang", "ㄙㄤ") + strProcessed.selfReplace("seng", "ㄙㄥ") + strProcessed.selfReplace("shai", "ㄕㄞ") + strProcessed.selfReplace("shan", "ㄕㄢ") + strProcessed.selfReplace("shao", "ㄕㄠ") + strProcessed.selfReplace("shei", "ㄕㄟ") + strProcessed.selfReplace("shen", "ㄕㄣ") + strProcessed.selfReplace("shou", "ㄕㄡ") + strProcessed.selfReplace("shua", "ㄕㄨㄚ") + strProcessed.selfReplace("shui", "ㄕㄨㄟ") + strProcessed.selfReplace("shun", "ㄕㄨㄣ") + strProcessed.selfReplace("shuo", "ㄕㄨㄛ") + strProcessed.selfReplace("song", "ㄙㄨㄥ") + strProcessed.selfReplace("suan", "ㄙㄨㄢ") + strProcessed.selfReplace("tang", "ㄊㄤ") + strProcessed.selfReplace("teng", "ㄊㄥ") + strProcessed.selfReplace("tian", "ㄊㄧㄢ") + strProcessed.selfReplace("tiao", "ㄊㄧㄠ") + strProcessed.selfReplace("ting", "ㄊㄧㄥ") + strProcessed.selfReplace("tong", "ㄊㄨㄥ") + strProcessed.selfReplace("tuan", "ㄊㄨㄢ") + strProcessed.selfReplace("wang", "ㄨㄤ") + strProcessed.selfReplace("weng", "ㄨㄥ") + strProcessed.selfReplace("xian", "ㄒㄧㄢ") + strProcessed.selfReplace("xiao", "ㄒㄧㄠ") + strProcessed.selfReplace("xing", "ㄒㄧㄥ") + strProcessed.selfReplace("xuan", "ㄒㄩㄢ") + strProcessed.selfReplace("yang", "ㄧㄤ") + strProcessed.selfReplace("ying", "ㄧㄥ") + strProcessed.selfReplace("yong", "ㄩㄥ") + strProcessed.selfReplace("yuan", "ㄩㄢ") + strProcessed.selfReplace("zang", "ㄗㄤ") + strProcessed.selfReplace("zeng", "ㄗㄥ") + strProcessed.selfReplace("zhai", "ㄓㄞ") + strProcessed.selfReplace("zhan", "ㄓㄢ") + strProcessed.selfReplace("zhao", "ㄓㄠ") + strProcessed.selfReplace("zhei", "ㄓㄟ") + strProcessed.selfReplace("zhen", "ㄓㄣ") + strProcessed.selfReplace("zhou", "ㄓㄡ") + strProcessed.selfReplace("zhua", "ㄓㄨㄚ") + strProcessed.selfReplace("zhui", "ㄓㄨㄟ") + strProcessed.selfReplace("zhun", "ㄓㄨㄣ") + strProcessed.selfReplace("zhuo", "ㄓㄨㄛ") + strProcessed.selfReplace("zong", "ㄗㄨㄥ") + strProcessed.selfReplace("zuan", "ㄗㄨㄢ") + strProcessed.selfReplace("jun", "ㄐㄩㄣ") + strProcessed.selfReplace("ang", "ㄤ") + strProcessed.selfReplace("bai", "ㄅㄞ") + strProcessed.selfReplace("ban", "ㄅㄢ") + strProcessed.selfReplace("bao", "ㄅㄠ") + strProcessed.selfReplace("bei", "ㄅㄟ") + strProcessed.selfReplace("ben", "ㄅㄣ") + strProcessed.selfReplace("bie", "ㄅㄧㄝ") + strProcessed.selfReplace("bin", "ㄅㄧㄣ") + strProcessed.selfReplace("cai", "ㄘㄞ") + strProcessed.selfReplace("can", "ㄘㄢ") + strProcessed.selfReplace("cao", "ㄘㄠ") + strProcessed.selfReplace("cei", "ㄘㄟ") + strProcessed.selfReplace("cen", "ㄘㄣ") + strProcessed.selfReplace("cha", "ㄔㄚ") + strProcessed.selfReplace("che", "ㄔㄜ") + strProcessed.selfReplace("chi", "ㄔ") + strProcessed.selfReplace("chu", "ㄔㄨ") + strProcessed.selfReplace("cou", "ㄘㄡ") + strProcessed.selfReplace("cui", "ㄘㄨㄟ") + strProcessed.selfReplace("cun", "ㄘㄨㄣ") + strProcessed.selfReplace("cuo", "ㄘㄨㄛ") + strProcessed.selfReplace("dai", "ㄉㄞ") + strProcessed.selfReplace("dan", "ㄉㄢ") + strProcessed.selfReplace("dao", "ㄉㄠ") + strProcessed.selfReplace("dei", "ㄉㄟ") + strProcessed.selfReplace("den", "ㄉㄣ") + strProcessed.selfReplace("dia", "ㄉㄧㄚ") + strProcessed.selfReplace("die", "ㄉㄧㄝ") + strProcessed.selfReplace("diu", "ㄉㄧㄡ") + strProcessed.selfReplace("dou", "ㄉㄡ") + strProcessed.selfReplace("dui", "ㄉㄨㄟ") + strProcessed.selfReplace("dun", "ㄉㄨㄣ") + strProcessed.selfReplace("duo", "ㄉㄨㄛ") + strProcessed.selfReplace("eng", "ㄥ") + strProcessed.selfReplace("fan", "ㄈㄢ") + strProcessed.selfReplace("fei", "ㄈㄟ") + strProcessed.selfReplace("fen", "ㄈㄣ") + strProcessed.selfReplace("fou", "ㄈㄡ") + strProcessed.selfReplace("gai", "ㄍㄞ") + strProcessed.selfReplace("gan", "ㄍㄢ") + strProcessed.selfReplace("gao", "ㄍㄠ") + strProcessed.selfReplace("gei", "ㄍㄟ") + strProcessed.selfReplace("gin", "ㄍㄧㄣ") + strProcessed.selfReplace("gen", "ㄍㄣ") + strProcessed.selfReplace("gou", "ㄍㄡ") + strProcessed.selfReplace("gua", "ㄍㄨㄚ") + strProcessed.selfReplace("gue", "ㄍㄨㄜ") + strProcessed.selfReplace("gui", "ㄍㄨㄟ") + strProcessed.selfReplace("gun", "ㄍㄨㄣ") + strProcessed.selfReplace("guo", "ㄍㄨㄛ") + strProcessed.selfReplace("hai", "ㄏㄞ") + strProcessed.selfReplace("han", "ㄏㄢ") + strProcessed.selfReplace("hao", "ㄏㄠ") + strProcessed.selfReplace("hei", "ㄏㄟ") + strProcessed.selfReplace("hen", "ㄏㄣ") + strProcessed.selfReplace("hou", "ㄏㄡ") + strProcessed.selfReplace("hua", "ㄏㄨㄚ") + strProcessed.selfReplace("hui", "ㄏㄨㄟ") + strProcessed.selfReplace("hun", "ㄏㄨㄣ") + strProcessed.selfReplace("huo", "ㄏㄨㄛ") + strProcessed.selfReplace("jia", "ㄐㄧㄚ") + strProcessed.selfReplace("jie", "ㄐㄧㄝ") + strProcessed.selfReplace("jin", "ㄐㄧㄣ") + strProcessed.selfReplace("jiu", "ㄐㄧㄡ") + strProcessed.selfReplace("jue", "ㄐㄩㄝ") + strProcessed.selfReplace("kai", "ㄎㄞ") + strProcessed.selfReplace("kan", "ㄎㄢ") + strProcessed.selfReplace("kao", "ㄎㄠ") + strProcessed.selfReplace("ken", "ㄎㄣ") + strProcessed.selfReplace("kiu", "ㄎㄧㄡ") + strProcessed.selfReplace("kou", "ㄎㄡ") + strProcessed.selfReplace("kua", "ㄎㄨㄚ") + strProcessed.selfReplace("kui", "ㄎㄨㄟ") + strProcessed.selfReplace("kun", "ㄎㄨㄣ") + strProcessed.selfReplace("kuo", "ㄎㄨㄛ") + strProcessed.selfReplace("lai", "ㄌㄞ") + strProcessed.selfReplace("lan", "ㄌㄢ") + strProcessed.selfReplace("lao", "ㄌㄠ") + strProcessed.selfReplace("lei", "ㄌㄟ") + strProcessed.selfReplace("lia", "ㄌㄧㄚ") + strProcessed.selfReplace("lie", "ㄌㄧㄝ") + strProcessed.selfReplace("lin", "ㄌㄧㄣ") + strProcessed.selfReplace("liu", "ㄌㄧㄡ") + strProcessed.selfReplace("lou", "ㄌㄡ") + strProcessed.selfReplace("lun", "ㄌㄨㄣ") + strProcessed.selfReplace("luo", "ㄌㄨㄛ") + strProcessed.selfReplace("lve", "ㄌㄩㄝ") + strProcessed.selfReplace("mai", "ㄇㄞ") + strProcessed.selfReplace("man", "ㄇㄢ") + strProcessed.selfReplace("mao", "ㄇㄠ") + strProcessed.selfReplace("mei", "ㄇㄟ") + strProcessed.selfReplace("men", "ㄇㄣ") + strProcessed.selfReplace("mie", "ㄇㄧㄝ") + strProcessed.selfReplace("min", "ㄇㄧㄣ") + strProcessed.selfReplace("miu", "ㄇㄧㄡ") + strProcessed.selfReplace("mou", "ㄇㄡ") + strProcessed.selfReplace("nai", "ㄋㄞ") + strProcessed.selfReplace("nan", "ㄋㄢ") + strProcessed.selfReplace("nao", "ㄋㄠ") + strProcessed.selfReplace("nei", "ㄋㄟ") + strProcessed.selfReplace("nen", "ㄋㄣ") + strProcessed.selfReplace("nie", "ㄋㄧㄝ") + strProcessed.selfReplace("nin", "ㄋㄧㄣ") + strProcessed.selfReplace("niu", "ㄋㄧㄡ") + strProcessed.selfReplace("nou", "ㄋㄡ") + strProcessed.selfReplace("nui", "ㄋㄨㄟ") + strProcessed.selfReplace("nun", "ㄋㄨㄣ") + strProcessed.selfReplace("nuo", "ㄋㄨㄛ") + strProcessed.selfReplace("nve", "ㄋㄩㄝ") + strProcessed.selfReplace("pai", "ㄆㄞ") + strProcessed.selfReplace("pan", "ㄆㄢ") + strProcessed.selfReplace("pao", "ㄆㄠ") + strProcessed.selfReplace("pei", "ㄆㄟ") + strProcessed.selfReplace("pen", "ㄆㄣ") + strProcessed.selfReplace("pia", "ㄆㄧㄚ") + strProcessed.selfReplace("pie", "ㄆㄧㄝ") + strProcessed.selfReplace("pin", "ㄆㄧㄣ") + strProcessed.selfReplace("pou", "ㄆㄡ") + strProcessed.selfReplace("qia", "ㄑㄧㄚ") + strProcessed.selfReplace("qie", "ㄑㄧㄝ") + strProcessed.selfReplace("qin", "ㄑㄧㄣ") + strProcessed.selfReplace("qiu", "ㄑㄧㄡ") + strProcessed.selfReplace("que", "ㄑㄩㄝ") + strProcessed.selfReplace("qun", "ㄑㄩㄣ") + strProcessed.selfReplace("ran", "ㄖㄢ") + strProcessed.selfReplace("rao", "ㄖㄠ") + strProcessed.selfReplace("ren", "ㄖㄣ") + strProcessed.selfReplace("rou", "ㄖㄡ") + strProcessed.selfReplace("rui", "ㄖㄨㄟ") + strProcessed.selfReplace("run", "ㄖㄨㄣ") + strProcessed.selfReplace("ruo", "ㄖㄨㄛ") + strProcessed.selfReplace("sai", "ㄙㄞ") + strProcessed.selfReplace("san", "ㄙㄢ") + strProcessed.selfReplace("sao", "ㄙㄠ") + strProcessed.selfReplace("sei", "ㄙㄟ") + strProcessed.selfReplace("sen", "ㄙㄣ") + strProcessed.selfReplace("sha", "ㄕㄚ") + strProcessed.selfReplace("she", "ㄕㄜ") + strProcessed.selfReplace("shi", "ㄕ") + strProcessed.selfReplace("shu", "ㄕㄨ") + strProcessed.selfReplace("sou", "ㄙㄡ") + strProcessed.selfReplace("sui", "ㄙㄨㄟ") + strProcessed.selfReplace("sun", "ㄙㄨㄣ") + strProcessed.selfReplace("suo", "ㄙㄨㄛ") + strProcessed.selfReplace("tai", "ㄊㄞ") + strProcessed.selfReplace("tan", "ㄊㄢ") + strProcessed.selfReplace("tao", "ㄊㄠ") + strProcessed.selfReplace("tie", "ㄊㄧㄝ") + strProcessed.selfReplace("tou", "ㄊㄡ") + strProcessed.selfReplace("tui", "ㄊㄨㄟ") + strProcessed.selfReplace("tun", "ㄊㄨㄣ") + strProcessed.selfReplace("tuo", "ㄊㄨㄛ") + strProcessed.selfReplace("wai", "ㄨㄞ") + strProcessed.selfReplace("wan", "ㄨㄢ") + strProcessed.selfReplace("wei", "ㄨㄟ") + strProcessed.selfReplace("wen", "ㄨㄣ") + strProcessed.selfReplace("xia", "ㄒㄧㄚ") + strProcessed.selfReplace("xie", "ㄒㄧㄝ") + strProcessed.selfReplace("xin", "ㄒㄧㄣ") + strProcessed.selfReplace("xiu", "ㄒㄧㄡ") + strProcessed.selfReplace("xue", "ㄒㄩㄝ") + strProcessed.selfReplace("xun", "ㄒㄩㄣ") + strProcessed.selfReplace("yai", "ㄧㄞ") + strProcessed.selfReplace("yan", "ㄧㄢ") + strProcessed.selfReplace("yao", "ㄧㄠ") + strProcessed.selfReplace("yin", "ㄧㄣ") + strProcessed.selfReplace("you", "ㄧㄡ") + strProcessed.selfReplace("yue", "ㄩㄝ") + strProcessed.selfReplace("yun", "ㄩㄣ") + strProcessed.selfReplace("zai", "ㄗㄞ") + strProcessed.selfReplace("zan", "ㄗㄢ") + strProcessed.selfReplace("zao", "ㄗㄠ") + strProcessed.selfReplace("zei", "ㄗㄟ") + strProcessed.selfReplace("zen", "ㄗㄣ") + strProcessed.selfReplace("zha", "ㄓㄚ") + strProcessed.selfReplace("zhe", "ㄓㄜ") + strProcessed.selfReplace("zhi", "ㄓ") + strProcessed.selfReplace("zhu", "ㄓㄨ") + strProcessed.selfReplace("zou", "ㄗㄡ") + strProcessed.selfReplace("zui", "ㄗㄨㄟ") + strProcessed.selfReplace("zun", "ㄗㄨㄣ") + strProcessed.selfReplace("zuo", "ㄗㄨㄛ") + strProcessed.selfReplace("ai", "ㄞ") + strProcessed.selfReplace("an", "ㄢ") + strProcessed.selfReplace("ao", "ㄠ") + strProcessed.selfReplace("ba", "ㄅㄚ") + strProcessed.selfReplace("bi", "ㄅㄧ") + strProcessed.selfReplace("bo", "ㄅㄛ") + strProcessed.selfReplace("bu", "ㄅㄨ") + strProcessed.selfReplace("ca", "ㄘㄚ") + strProcessed.selfReplace("ce", "ㄘㄜ") + strProcessed.selfReplace("ci", "ㄘ") + strProcessed.selfReplace("cu", "ㄘㄨ") + strProcessed.selfReplace("da", "ㄉㄚ") + strProcessed.selfReplace("de", "ㄉㄜ") + strProcessed.selfReplace("di", "ㄉㄧ") + strProcessed.selfReplace("du", "ㄉㄨ") + strProcessed.selfReplace("eh", "ㄝ") + strProcessed.selfReplace("ei", "ㄟ") + strProcessed.selfReplace("en", "ㄣ") + strProcessed.selfReplace("er", "ㄦ") + strProcessed.selfReplace("fa", "ㄈㄚ") + strProcessed.selfReplace("fo", "ㄈㄛ") + strProcessed.selfReplace("fu", "ㄈㄨ") + strProcessed.selfReplace("ga", "ㄍㄚ") + strProcessed.selfReplace("ge", "ㄍㄜ") + strProcessed.selfReplace("gi", "ㄍㄧ") + strProcessed.selfReplace("gu", "ㄍㄨ") + strProcessed.selfReplace("ha", "ㄏㄚ") + strProcessed.selfReplace("he", "ㄏㄜ") + strProcessed.selfReplace("hu", "ㄏㄨ") + strProcessed.selfReplace("ji", "ㄐㄧ") + strProcessed.selfReplace("ju", "ㄐㄩ") + strProcessed.selfReplace("ka", "ㄎㄚ") + strProcessed.selfReplace("ke", "ㄎㄜ") + strProcessed.selfReplace("ku", "ㄎㄨ") + strProcessed.selfReplace("la", "ㄌㄚ") + strProcessed.selfReplace("le", "ㄌㄜ") + strProcessed.selfReplace("li", "ㄌㄧ") + strProcessed.selfReplace("lo", "ㄌㄛ") + strProcessed.selfReplace("lu", "ㄌㄨ") + strProcessed.selfReplace("lv", "ㄌㄩ") + strProcessed.selfReplace("ma", "ㄇㄚ") + strProcessed.selfReplace("me", "ㄇㄜ") + strProcessed.selfReplace("mi", "ㄇㄧ") + strProcessed.selfReplace("mo", "ㄇㄛ") + strProcessed.selfReplace("mu", "ㄇㄨ") + strProcessed.selfReplace("na", "ㄋㄚ") + strProcessed.selfReplace("ne", "ㄋㄜ") + strProcessed.selfReplace("ni", "ㄋㄧ") + strProcessed.selfReplace("nu", "ㄋㄨ") + strProcessed.selfReplace("nv", "ㄋㄩ") + strProcessed.selfReplace("ou", "ㄡ") + strProcessed.selfReplace("pa", "ㄆㄚ") + strProcessed.selfReplace("pi", "ㄆㄧ") + strProcessed.selfReplace("po", "ㄆㄛ") + strProcessed.selfReplace("pu", "ㄆㄨ") + strProcessed.selfReplace("qi", "ㄑㄧ") + strProcessed.selfReplace("qu", "ㄑㄩ") + strProcessed.selfReplace("re", "ㄖㄜ") + strProcessed.selfReplace("ri", "ㄖ") + strProcessed.selfReplace("ru", "ㄖㄨ") + strProcessed.selfReplace("sa", "ㄙㄚ") + strProcessed.selfReplace("se", "ㄙㄜ") + strProcessed.selfReplace("si", "ㄙ") + strProcessed.selfReplace("su", "ㄙㄨ") + strProcessed.selfReplace("ta", "ㄊㄚ") + strProcessed.selfReplace("te", "ㄊㄜ") + strProcessed.selfReplace("ti", "ㄊㄧ") + strProcessed.selfReplace("tu", "ㄊㄨ") + strProcessed.selfReplace("wa", "ㄨㄚ") + strProcessed.selfReplace("wo", "ㄨㄛ") + strProcessed.selfReplace("wu", "ㄨ") + strProcessed.selfReplace("xi", "ㄒㄧ") + strProcessed.selfReplace("xu", "ㄒㄩ") + strProcessed.selfReplace("ya", "ㄧㄚ") + strProcessed.selfReplace("ye", "ㄧㄝ") + strProcessed.selfReplace("yi", "ㄧ") + strProcessed.selfReplace("yo", "ㄧㄛ") + strProcessed.selfReplace("yu", "ㄩ") + strProcessed.selfReplace("za", "ㄗㄚ") + strProcessed.selfReplace("ze", "ㄗㄜ") + strProcessed.selfReplace("zi", "ㄗ") + strProcessed.selfReplace("zu", "ㄗㄨ") + strProcessed.selfReplace("a", "ㄚ") + strProcessed.selfReplace("e", "ㄜ") + strProcessed.selfReplace("o", "ㄛ") + strProcessed.selfReplace("q", "ㄑ") + strProcessed.selfReplace("2", "ˊ") + strProcessed.selfReplace("3", "ˇ") + strProcessed.selfReplace("4", "ˋ") + strProcessed.selfReplace("5", "˙") + strProcessed.selfReplace("1", "") } // Step 3: Add Formatted Pragma, the Sorted Header: @@ -509,14 +501,10 @@ extension String { strProcessed = hdrFormatted + strProcessed // Add Sorted Header // Step 4: Deduplication. - arrData = strProcessed.components(separatedBy: "\n") - strProcessed = "" // Reset its value + let arrData = strProcessed.split(separator: "\n") // 下面兩行的 reversed 是首尾顛倒,免得破壞最新的 override 資訊。 let arrDataDeduplicated = Array(NSOrderedSet(array: arrData.reversed()).array as! [String]) - for lineData in arrDataDeduplicated.reversed() { - strProcessed += lineData - strProcessed += "\n" - } + strProcessed = arrDataDeduplicated.reversed().joined(separator: "\n") + "\n" // Step 5: Remove duplicated newlines at the end of the file. strProcessed.regReplace(pattern: "\\n+", replaceWith: "\n") From 9682b8ea5da5483f2419fb85b45633b3de6a9463 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Tue, 10 May 2022 18:55:37 +0800 Subject: [PATCH 6/9] LMConsolidator // Optimize data handling process. --- Source/Modules/LangModelRelated/LMConsolidator.swift | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Source/Modules/LangModelRelated/LMConsolidator.swift b/Source/Modules/LangModelRelated/LMConsolidator.swift index 30f173dc..b9392b4f 100644 --- a/Source/Modules/LangModelRelated/LMConsolidator.swift +++ b/Source/Modules/LangModelRelated/LMConsolidator.swift @@ -120,14 +120,10 @@ extension vChewing { } // Step 4: Deduplication. - let arrData = strProcessed.components(separatedBy: "\n") - strProcessed = "" // Reset its value + let arrData = strProcessed.split(separator: "\n") // 下面兩行的 reversed 是首尾顛倒,免得破壞最新的 override 資訊。 let arrDataDeduplicated = Array(NSOrderedSet(array: arrData.reversed()).array as! [String]) - for lineData in arrDataDeduplicated.reversed() { - strProcessed += lineData - strProcessed += "\n" - } + strProcessed = arrDataDeduplicated.reversed().joined(separator: "\n") + "\n" // Step 5: Remove duplicated newlines at the end of the file. strProcessed.regReplace(pattern: "\\n+", replaceWith: "\n") From 1ea40833386562ed5f4d5cee95a25a882f72637d Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Tue, 10 May 2022 19:00:50 +0800 Subject: [PATCH 7/9] LMInstantiator // Use NSOrderedSet to deduplicate. --- .../LangModelRelated/LMInstantiator.swift | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index 529a11e3..c9312cf4 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -236,9 +236,8 @@ extension vChewing { rawAllUnigrams += lmSymbols.unigramsFor(key: key) } - // 準備過濾清單與統計清單 - var insertedPairs: Set = [] // 統計清單 - var filteredPairs: Set = [] // 過濾清單 + // 準備過濾清單。因為我們在 Swift 使用 NSOrderedSet,所以就不需要統計清單了。 + var filteredPairs: Set = [] // 載入要過濾的 KeyValuePair 清單。 for unigram in lmFiltered.unigramsFor(key: key) { @@ -247,7 +246,7 @@ extension vChewing { return filterAndTransform( unigrams: rawAllUnigrams, - filter: filteredPairs, inserted: &insertedPairs + filter: filteredPairs ) } @@ -275,8 +274,7 @@ extension vChewing { func filterAndTransform( unigrams: [Megrez.Unigram], - filter filteredPairs: Set, - inserted insertedPairs: inout Set + filter filteredPairs: Set ) -> [Megrez.Unigram] { var results: [Megrez.Unigram] = [] @@ -293,13 +291,12 @@ extension vChewing { pair.value = replacement } } - - if !insertedPairs.contains(pair) { - results.append(Megrez.Unigram(keyValue: pair, score: unigram.score)) - insertedPairs.insert(pair) - } + results.append(Megrez.Unigram(keyValue: pair, score: unigram.score)) } - return results + // Swift 不見得非得用 Swift-Collections 才可以用 OrderedSet,還有 NSOrderedSet 可用來去重複。 + let resultsDeduplicated = Array(NSOrderedSet(array: results).array as! [Megrez.Unigram]) + + return resultsDeduplicated } } } From 9e6badb64c9e054a93a82db8a56fdf8f886b288c Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Wed, 11 May 2022 12:54:45 +0800 Subject: [PATCH 8/9] Update Data - 20220511 --- Source/Data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Data b/Source/Data index 4f922087..f49547bf 160000 --- a/Source/Data +++ b/Source/Data @@ -1 +1 @@ -Subproject commit 4f922087d6c20964a59f7838e05ae82beef493d1 +Subproject commit f49547bff120f4a800d1930384a428890cf13f9d From 5eeb11e1761c03c0b2219528aad450dc2c25e8c8 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Wed, 11 May 2022 12:55:10 +0800 Subject: [PATCH 9/9] Bump version to 1.5.5 Build 1956. --- Update-Info.plist | 4 ++-- vChewing.pkgproj | 2 +- vChewing.xcodeproj/project.pbxproj | 24 ++++++++++++------------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Update-Info.plist b/Update-Info.plist index 871e0c7e..3b5600ae 100644 --- a/Update-Info.plist +++ b/Update-Info.plist @@ -3,9 +3,9 @@ CFBundleShortVersionString - 1.5.5 + 1.5.6 CFBundleVersion - 1955 + 1956 UpdateInfoEndpoint https://gitee.com/vchewing/vChewing-macOS/raw/main/Update-Info.plist UpdateInfoSite diff --git a/vChewing.pkgproj b/vChewing.pkgproj index 535324c3..77218f6c 100644 --- a/vChewing.pkgproj +++ b/vChewing.pkgproj @@ -726,7 +726,7 @@ USE_HFS+_COMPRESSION VERSION - 1.5.5 + 1.5.6 TYPE 0 diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index 63324db9..f8bb64b8 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -1332,7 +1332,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1955; + CURRENT_PROJECT_VERSION = 1956; DEBUG_INFORMATION_FORMAT = dwarf; GCC_C_LANGUAGE_STANDARD = gnu11; GCC_DYNAMIC_NO_PIC = NO; @@ -1355,7 +1355,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.5; + MARKETING_VERSION = 1.5.6; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.vChewing.vChewingPhraseEditor; @@ -1388,7 +1388,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1955; + CURRENT_PROJECT_VERSION = 1956; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; GCC_C_LANGUAGE_STANDARD = gnu11; @@ -1407,7 +1407,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.5; + MARKETING_VERSION = 1.5.6; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.vChewing.vChewingPhraseEditor; @@ -1522,7 +1522,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1955; + CURRENT_PROJECT_VERSION = 1956; DEVELOPMENT_ASSET_PATHS = ""; DEVELOPMENT_TEAM = ""; GCC_C_LANGUAGE_STANDARD = gnu99; @@ -1557,7 +1557,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.5; + MARKETING_VERSION = 1.5.6; ONLY_ACTIVE_ARCH = YES; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.inputmethod.vChewing; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -1589,7 +1589,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1955; + CURRENT_PROJECT_VERSION = 1956; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEVELOPMENT_ASSET_PATHS = ""; DEVELOPMENT_TEAM = ""; @@ -1619,7 +1619,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.5; + MARKETING_VERSION = 1.5.6; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.inputmethod.vChewing; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = ""; @@ -1702,7 +1702,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1955; + CURRENT_PROJECT_VERSION = 1956; DEVELOPMENT_TEAM = ""; GCC_C_LANGUAGE_STANDARD = gnu99; GCC_DYNAMIC_NO_PIC = NO; @@ -1727,7 +1727,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.5; + MARKETING_VERSION = 1.5.6; ONLY_ACTIVE_ARCH = YES; PRODUCT_BUNDLE_IDENTIFIER = "org.atelierInmu.vChewing.${PRODUCT_NAME:rfc1034identifier}"; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -1754,7 +1754,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1955; + CURRENT_PROJECT_VERSION = 1956; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEVELOPMENT_TEAM = ""; GCC_C_LANGUAGE_STANDARD = gnu99; @@ -1774,7 +1774,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.5; + MARKETING_VERSION = 1.5.6; PRODUCT_BUNDLE_IDENTIFIER = "org.atelierInmu.vChewing.${PRODUCT_NAME:rfc1034identifier}"; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = "";