LMI // Rewrite documentation in Chinese.

2022-06-18 14:45:12 +08:00 · 2022-06-18 14:45:12 +08:00 · 2a97a6a80a
parent 6d4adea6ed
commit 2a97a6a80a
1 changed files with 25 additions and 27 deletions
--- a/Source/Modules/LangModelRelated/LMInstantiator.swift
+++ b/Source/Modules/LangModelRelated/LMInstantiator.swift
@ -24,9 +24,6 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-// NOTE: We still keep some of the comments left by Zonble,
-// regardless that he is not in charge of this Swift module。
-
 import Foundation

 // 簡體中文模式與繁體中文模式共用全字庫擴展模組，故單獨處理。
@ -39,27 +36,23 @@ private var lmSymbols = vChewing.LMCoreNS(
 )

 extension vChewing {
-  /// LMInstantiator is a facade for managing a set of models including
-  /// the input method language model, user phrases and excluded phrases.
+  /// 語言模組副本化模組（LMInstantiator，下稱「LMI」）自身為符合天權星組字引擎內
+  /// 的 LanguageModel 協定的模組、統籌且整理來自其它子模組的資料（包括使用者語彙、
+  /// 繪文字模組、語彙濾除表、原廠語言模組等）。
  ///
-  /// It is the primary model class that the input controller and input compositor
-  /// of vChewing talks to. When the input compositor starts to build a sentence
-  /// from a series of BPMF readings, it passes the readings to the model to see
-  /// if there are valid unigrams, and use returned unigrams to produce the final
-  /// results.
+  /// LMI 型別為與輸入法按鍵調度模組直接溝通之唯一語言模組。當組字器開始根據給定的
+  /// 讀音鏈構築語句時，LMI 會接收來自組字器的讀音、輪流檢查自身是否有可以匹配到的
+  /// 單元圖結果，然後將結果整理為陣列、再回饋給組字器。
  ///
-  /// LMInstantiator combine and transform the unigrams from the primary language
-  /// model and user phrases. The process is
+  /// LMI 還會在將單元圖結果整理成陣列時做出下述處理轉換步驟：
  ///
-  /// 1) Get the original unigrams.
-  /// 2) Drop the unigrams whose value is contained in the exclusion map.
-  /// 3) Replace the values of the unigrams using the phrase replacement map.
-  /// 4) Drop the duplicated phrases from the generated unigram array.
+  /// 1. 獲取原始結果陣列。
+  /// 2. 如果有原始結果也出現在濾除表當中的話，則自結果陣列丟棄這類結果。
+  /// 3. 如果啟用了語彙置換的話，則對目前經過處理的結果陣列套用語彙置換。
+  /// 4. 擁有相同讀音與詞語資料值的單元圖只會留下權重最大的那一筆，其餘重複值會被丟棄。
  ///
-  /// The controller can ask the model to load the primary input method language
-  /// model while launching and to load the user phrases anytime if the custom
-  /// files are modified. It does not keep the reference of the data pathes but
-  /// you have to pass the paths when you ask it to load.
+  /// LMI 會根據需要分別載入原廠語言模組和其他個別的子語言模組。LMI 本身不會記錄這些
+  /// 語言模組的相關資料的存放位置，僅藉由參數來讀取相關訊息。
  public class LMInstantiator: Megrez.LanguageModel {
    // 在函式內部用以記錄狀態的開關。
    public var isPhraseReplacementEnabled = false
@ -76,9 +69,9 @@ extension vChewing {
    /// 但是，LMCoreEX 對 2010-2013 年等舊 mac 機種而言，讀取速度異常緩慢。
    /// 於是 LMCoreNS 就出場了，專門用來讀取原廠的 plist 格式的辭典。

-    // 聲明原廠語言模組
-    /// Reverse 的話，第一欄是注音，第二欄是對應的漢字，第三欄是可能的權重。
-    /// 不 Reverse 的話，第一欄是漢字，第二欄是對應的注音，第三欄是可能的權重。
+    // 聲明原廠語言模組：
+    // Reverse 的話，第一欄是注音，第二欄是對應的漢字，第三欄是可能的權重。
+    // 不 Reverse 的話，第一欄是漢字，第二欄是對應的注音，第三欄是可能的權重。
    var lmCore = LMCoreNS(
      reverse: false, consolidate: false, defaultScore: -9.9, forceDefaultScore: false
    )
@ -194,12 +187,12 @@ extension vChewing {

    // MARK: - Core Functions (Public)

-    /// Not implemented since we do not have data to provide bigram function.
+    /// 威注音輸入法目前尚未具備對雙元圖的處理能力，故停用該函式。
    // public func bigramsForKeys(preceedingKey: String, key: String) -> [Megrez.Bigram] { }

-    /// Returns a list of available unigram for the given key.
-    /// @param key:String represents the BPMF reading or a symbol key.
-    /// For instance, it you pass "ㄉㄨㄟˇ", it returns "㨃" and other possible candidates.
+    /// 給定讀音字串，讓 LMI 給出對應的經過處理的單元圖陣列。
+    /// - Parameter key: 給定的讀音字串。
+    /// - Returns: 對應的經過處理的單元圖陣列。
    override open func unigramsFor(key: String) -> [Megrez.Unigram] {
      if key == " " {
        /// 給空格鍵指定輸出值。
@ -267,6 +260,11 @@ extension vChewing {

    // MARK: - Core Functions (Private)

+    /// 給定單元圖原始結果陣列，經過語彙過濾處理＋置換處理＋去重複處理之後，給出單元圖結果陣列。
+    /// - Parameters:
+    ///   - unigrams: 傳入的單元圖原始結果陣列
+    ///   - filteredPairs: 傳入的要過濾掉的鍵值配對陣列
+    /// - Returns: 經過語彙過濾處理＋置換處理＋去重複處理的單元圖結果陣列
    func filterAndTransform(
      unigrams: [Megrez.Unigram],
      filter filteredPairs: Set<Megrez.KeyValuePair>