From 2a97a6a80a3e17a986a4e6d887b55848e3781e26 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sat, 18 Jun 2022 14:45:12 +0800 Subject: [PATCH] LMI // Rewrite documentation in Chinese. --- .../LangModelRelated/LMInstantiator.swift | 52 +++++++++---------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index 8324ac22..6ea47680 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -24,9 +24,6 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -// NOTE: We still keep some of the comments left by Zonble, -// regardless that he is not in charge of this Swift module。 - import Foundation // 簡體中文模式與繁體中文模式共用全字庫擴展模組,故單獨處理。 @@ -39,27 +36,23 @@ private var lmSymbols = vChewing.LMCoreNS( ) extension vChewing { - /// LMInstantiator is a facade for managing a set of models including - /// the input method language model, user phrases and excluded phrases. + /// 語言模組副本化模組(LMInstantiator,下稱「LMI」)自身為符合天權星組字引擎內 + /// 的 LanguageModel 協定的模組、統籌且整理來自其它子模組的資料(包括使用者語彙、 + /// 繪文字模組、語彙濾除表、原廠語言模組等)。 /// - /// It is the primary model class that the input controller and input compositor - /// of vChewing talks to. When the input compositor starts to build a sentence - /// from a series of BPMF readings, it passes the readings to the model to see - /// if there are valid unigrams, and use returned unigrams to produce the final - /// results. + /// LMI 型別為與輸入法按鍵調度模組直接溝通之唯一語言模組。當組字器開始根據給定的 + /// 讀音鏈構築語句時,LMI 會接收來自組字器的讀音、輪流檢查自身是否有可以匹配到的 + /// 單元圖結果,然後將結果整理為陣列、再回饋給組字器。 /// - /// LMInstantiator combine and transform the unigrams from the primary language - /// model and user phrases. The process is + /// LMI 還會在將單元圖結果整理成陣列時做出下述處理轉換步驟: /// - /// 1) Get the original unigrams. - /// 2) Drop the unigrams whose value is contained in the exclusion map. - /// 3) Replace the values of the unigrams using the phrase replacement map. - /// 4) Drop the duplicated phrases from the generated unigram array. + /// 1. 獲取原始結果陣列。 + /// 2. 如果有原始結果也出現在濾除表當中的話,則自結果陣列丟棄這類結果。 + /// 3. 如果啟用了語彙置換的話,則對目前經過處理的結果陣列套用語彙置換。 + /// 4. 擁有相同讀音與詞語資料值的單元圖只會留下權重最大的那一筆,其餘重複值會被丟棄。 /// - /// The controller can ask the model to load the primary input method language - /// model while launching and to load the user phrases anytime if the custom - /// files are modified. It does not keep the reference of the data pathes but - /// you have to pass the paths when you ask it to load. + /// LMI 會根據需要分別載入原廠語言模組和其他個別的子語言模組。LMI 本身不會記錄這些 + /// 語言模組的相關資料的存放位置,僅藉由參數來讀取相關訊息。 public class LMInstantiator: Megrez.LanguageModel { // 在函式內部用以記錄狀態的開關。 public var isPhraseReplacementEnabled = false @@ -76,9 +69,9 @@ extension vChewing { /// 但是,LMCoreEX 對 2010-2013 年等舊 mac 機種而言,讀取速度異常緩慢。 /// 於是 LMCoreNS 就出場了,專門用來讀取原廠的 plist 格式的辭典。 - // 聲明原廠語言模組 - /// Reverse 的話,第一欄是注音,第二欄是對應的漢字,第三欄是可能的權重。 - /// 不 Reverse 的話,第一欄是漢字,第二欄是對應的注音,第三欄是可能的權重。 + // 聲明原廠語言模組: + // Reverse 的話,第一欄是注音,第二欄是對應的漢字,第三欄是可能的權重。 + // 不 Reverse 的話,第一欄是漢字,第二欄是對應的注音,第三欄是可能的權重。 var lmCore = LMCoreNS( reverse: false, consolidate: false, defaultScore: -9.9, forceDefaultScore: false ) @@ -194,12 +187,12 @@ extension vChewing { // MARK: - Core Functions (Public) - /// Not implemented since we do not have data to provide bigram function. + /// 威注音輸入法目前尚未具備對雙元圖的處理能力,故停用該函式。 // public func bigramsForKeys(preceedingKey: String, key: String) -> [Megrez.Bigram] { } - /// Returns a list of available unigram for the given key. - /// @param key:String represents the BPMF reading or a symbol key. - /// For instance, it you pass "ㄉㄨㄟˇ", it returns "㨃" and other possible candidates. + /// 給定讀音字串,讓 LMI 給出對應的經過處理的單元圖陣列。 + /// - Parameter key: 給定的讀音字串。 + /// - Returns: 對應的經過處理的單元圖陣列。 override open func unigramsFor(key: String) -> [Megrez.Unigram] { if key == " " { /// 給空格鍵指定輸出值。 @@ -267,6 +260,11 @@ extension vChewing { // MARK: - Core Functions (Private) + /// 給定單元圖原始結果陣列,經過語彙過濾處理+置換處理+去重複處理之後,給出單元圖結果陣列。 + /// - Parameters: + /// - unigrams: 傳入的單元圖原始結果陣列 + /// - filteredPairs: 傳入的要過濾掉的鍵值配對陣列 + /// - Returns: 經過語彙過濾處理+置換處理+去重複處理的單元圖結果陣列 func filterAndTransform( unigrams: [Megrez.Unigram], filter filteredPairs: Set