From fa3ba1c893432212159ab8a2bf2773bfb4bf7cdf Mon Sep 17 00:00:00 2001
From: ShikiSuen <shikisuen@outlook.com>
Date: Sat, 16 Jul 2022 16:41:51 +0800
Subject: [PATCH] dataCompiler // Add health and duplication check.

---
 DataCompiler/dataCompiler.swift | 417 +++++++++++++++++++++++++++-----
 1 file changed, 352 insertions(+), 65 deletions(-)

diff --git a/DataCompiler/dataCompiler.swift b/DataCompiler/dataCompiler.swift
index f4dfd21e..a20e0695 100644
--- a/DataCompiler/dataCompiler.swift
+++ b/DataCompiler/dataCompiler.swift
@@ -60,9 +60,9 @@ extension String {
 // MARK: - 引入小數點位數控制函式
 
 // Ref: https://stackoverflow.com/a/32581409/4162914
-extension Float {
-  fileprivate func rounded(toPlaces places: Int) -> Float {
-    let divisor = pow(10.0, Float(places))
+extension Double {
+  fileprivate func rounded(toPlaces places: Int) -> Double {
+    let divisor = pow(10.0, Double(places))
     return (self * divisor).rounded() / divisor
   }
 }
@@ -81,17 +81,16 @@ func ** (_ base: Double, _ exp: Double) -> Double {
   pow(base, exp)
 }
 
-func ** (_ base: Float, _ exp: Float) -> Float {
-  pow(base, exp)
-}
-
 // MARK: - 定義檔案結構
 
-struct Entry {
-  var valPhone: String = ""
-  var valPhrase: String = ""
-  var valWeight: Float = -1.0
-  var valCount: Int = 0
+struct Unigram: CustomStringConvertible {
+  var key: String = ""
+  var value: String = ""
+  var score: Double = -1.0
+  var count: Int = 0
+  var description: String {
+    "(\(key), \(value), \(score))"
+  }
 }
 
 // MARK: - 注音加密，減少 plist 體積
@@ -105,8 +104,8 @@ func cnvPhonabetToASCII(_ incoming: String) -> String {
   ]
   var strOutput = incoming
   if !strOutput.contains("_") {
-    for entry in dicPhonabet2ASCII {
-      strOutput = strOutput.replacingOccurrences(of: entry.key, with: entry.value)
+    for Unigram in dicPhonabet2ASCII {
+      strOutput = strOutput.replacingOccurrences(of: Unigram.key, with: Unigram.value)
     }
   }
   return strOutput
@@ -146,8 +145,8 @@ private let urlPlistCHT: String = "./data-cht.plist"
 
 // MARK: - 載入詞組檔案且輸出陣列
 
-func rawDictForPhrases(isCHS: Bool) -> [Entry] {
-  var arrEntryRAW: [Entry] = []
+func rawDictForPhrases(isCHS: Bool) -> [Unigram] {
+  var arrUnigramRAW: [Unigram] = []
   var strRAW = ""
   let urlCustom: String = isCHS ? urlCHSforCustom : urlCHTforCustom
   let urlTABE: String = isCHS ? urlCHSforTABE : urlCHTforTABE
@@ -195,7 +194,7 @@ func rawDictForPhrases(isCHS: Bool) -> [Entry] {
         varLineDataProcessed += currentCell
       }
     }
-    // 然後直接乾脆就轉成 Entry 吧。
+    // 然後直接乾脆就轉成 Unigram 吧。
     let arrCells: [String] = varLineDataProcessed.components(separatedBy: "\t")
     count = 0  // 不需要再定義，因為之前已經有定義過了。
     var phone = ""
@@ -211,22 +210,22 @@ func rawDictForPhrases(isCHS: Bool) -> [Entry] {
       }
     }
     if phrase != "" {  // 廢掉空數據；之後無須再這樣處理。
-      arrEntryRAW += [
-        Entry(
-          valPhone: phone, valPhrase: phrase, valWeight: 0.0,
-          valCount: occurrence
+      arrUnigramRAW += [
+        Unigram(
+          key: phone, value: phrase, score: 0.0,
+          count: occurrence
         )
       ]
     }
   }
   NSLog(" - \(i18n): 成功生成詞語語料辭典（權重待計算）。")
-  return arrEntryRAW
+  return arrUnigramRAW
 }
 
 // MARK: - 載入單字檔案且輸出陣列
 
-func rawDictForKanjis(isCHS: Bool) -> [Entry] {
-  var arrEntryRAW: [Entry] = []
+func rawDictForKanjis(isCHS: Bool) -> [Unigram] {
+  var arrUnigramRAW: [Unigram] = []
   var strRAW = ""
   let i18n: String = isCHS ? "簡體中文" : "繁體中文"
   // 讀取內容
@@ -272,7 +271,7 @@ func rawDictForKanjis(isCHS: Bool) -> [Entry] {
         varLineDataProcessed += currentCell
       }
     }
-    // 然後直接乾脆就轉成 Entry 吧。
+    // 然後直接乾脆就轉成 Unigram 吧。
     let arrCells: [String] = varLineDataProcessed.components(separatedBy: "\t")
     count = 0  // 不需要再定義，因為之前已經有定義過了。
     var phone = ""
@@ -288,22 +287,22 @@ func rawDictForKanjis(isCHS: Bool) -> [Entry] {
       }
     }
     if phrase != "" {  // 廢掉空數據；之後無須再這樣處理。
-      arrEntryRAW += [
-        Entry(
-          valPhone: phone, valPhrase: phrase, valWeight: 0.0,
-          valCount: occurrence
+      arrUnigramRAW += [
+        Unigram(
+          key: phone, value: phrase, score: 0.0,
+          count: occurrence
         )
       ]
     }
   }
   NSLog(" - \(i18n): 成功生成單字語料辭典（權重待計算）。")
-  return arrEntryRAW
+  return arrUnigramRAW
 }
 
 // MARK: - 載入非漢字檔案且輸出陣列
 
-func rawDictForNonKanjis(isCHS: Bool) -> [Entry] {
-  var arrEntryRAW: [Entry] = []
+func rawDictForNonKanjis(isCHS: Bool) -> [Unigram] {
+  var arrUnigramRAW: [Unigram] = []
   var strRAW = ""
   let i18n: String = isCHS ? "簡體中文" : "繁體中文"
   // 讀取內容
@@ -347,7 +346,7 @@ func rawDictForNonKanjis(isCHS: Bool) -> [Entry] {
         varLineDataProcessed += currentCell
       }
     }
-    // 然後直接乾脆就轉成 Entry 吧。
+    // 然後直接乾脆就轉成 Unigram 吧。
     let arrCells: [String] = varLineDataProcessed.components(separatedBy: "\t")
     count = 0  // 不需要再定義，因為之前已經有定義過了。
     var phone = ""
@@ -363,60 +362,60 @@ func rawDictForNonKanjis(isCHS: Bool) -> [Entry] {
       }
     }
     if phrase != "" {  // 廢掉空數據；之後無須再這樣處理。
-      arrEntryRAW += [
-        Entry(
-          valPhone: phone, valPhrase: phrase, valWeight: 0.0,
-          valCount: occurrence
+      arrUnigramRAW += [
+        Unigram(
+          key: phone, value: phrase, score: 0.0,
+          count: occurrence
         )
       ]
     }
   }
   NSLog(" - \(i18n): 成功生成非漢字語料辭典（權重待計算）。")
-  return arrEntryRAW
+  return arrUnigramRAW
 }
 
-func weightAndSort(_ arrStructUncalculated: [Entry], isCHS: Bool) -> [Entry] {
+func weightAndSort(_ arrStructUncalculated: [Unigram], isCHS: Bool) -> [Unigram] {
   let i18n: String = isCHS ? "簡體中文" : "繁體中文"
-  var arrStructCalculated: [Entry] = []
-  let fscale: Float = 2.7
-  var norm: Float = 0.0
-  for entry in arrStructUncalculated {
-    if entry.valCount >= 0 {
-      norm += fscale ** (Float(entry.valPhrase.count) / 3.0 - 1.0)
-        * Float(entry.valCount)
+  var arrStructCalculated: [Unigram] = []
+  let fscale = 2.7
+  var norm = 0.0
+  for unigram in arrStructUncalculated {
+    if unigram.count >= 0 {
+      norm += fscale ** (Double(unigram.value.count) / 3.0 - 1.0)
+        * Double(unigram.count)
     }
   }
   // norm 計算完畢，開始將 norm 作為新的固定常數來為每個詞條記錄計算權重。
   // 將新酷音的詞語出現次數數據轉換成小麥引擎可讀的數據形式。
   // 對出現次數小於 1 的詞條，將 0 當成 0.5 來處理、以防止除零。
-  for entry in arrStructUncalculated {
-    var weight: Float = 0
-    switch entry.valCount {
+  for unigram in arrStructUncalculated {
+    var weight: Double = 0
+    switch unigram.count {
       case -2:  // 拗音假名
         weight = -13
       case -1:  // 單個假名
         weight = -13
       case 0:  // 墊底低頻漢字與詞語
         weight = log10(
-          fscale ** (Float(entry.valPhrase.count) / 3.0 - 1.0) * 0.25 / norm)
+          fscale ** (Double(unigram.value.count) / 3.0 - 1.0) * 0.25 / norm)
       default:
         weight = log10(
-          fscale ** (Float(entry.valPhrase.count) / 3.0 - 1.0)
-            * Float(entry.valCount) / norm)  // Credit: MJHsieh.
+          fscale ** (Double(unigram.value.count) / 3.0 - 1.0)
+            * Double(unigram.count) / norm)  // Credit: MJHsieh.
     }
-    let weightRounded: Float = weight.rounded(toPlaces: 3)  // 為了節省生成的檔案體積，僅保留小數點後三位。
+    let weightRounded: Double = weight.rounded(toPlaces: 3)  // 為了節省生成的檔案體積，僅保留小數點後三位。
     arrStructCalculated += [
-      Entry(
-        valPhone: entry.valPhone, valPhrase: entry.valPhrase, valWeight: weightRounded,
-        valCount: entry.valCount
+      Unigram(
+        key: unigram.key, value: unigram.value, score: weightRounded,
+        count: unigram.count
       )
     ]
   }
   NSLog(" - \(i18n): 成功計算權重。")
   // ==========================================
   // 接下來是排序，先按照注音遞減排序一遍、再按照權重遞減排序一遍。
-  let arrStructSorted: [Entry] = arrStructCalculated.sorted(by: { lhs, rhs -> Bool in
-    (lhs.valPhone, rhs.valCount) < (rhs.valPhone, lhs.valCount)
+  let arrStructSorted: [Unigram] = arrStructCalculated.sorted(by: { lhs, rhs -> Bool in
+    (lhs.key, rhs.count) < (rhs.key, lhs.count)
   })
   NSLog(" - \(i18n): 排序整理完畢，準備編譯要寫入的檔案內容。")
   return arrStructSorted
@@ -434,9 +433,11 @@ func fileOutput(isCHS: Bool) {
   // 讀取標點內容
   do {
     strPunctuation = try String(contentsOfFile: urlPunctuation, encoding: .utf8).replacingOccurrences(
-      of: "\t", with: " ")
+      of: "\t", with: " "
+    )
     strPrintLine += try String(contentsOfFile: urlPunctuation, encoding: .utf8).replacingOccurrences(
-      of: "\t", with: " ")
+      of: "\t", with: " "
+    )
   } catch {
     NSLog(" - \(i18n): Exception happened when reading raw punctuation data.")
   }
@@ -453,18 +454,33 @@ func fileOutput(isCHS: Bool) {
       }
     }
   }
-  var arrStructUnified: [Entry] = []
+  var arrStructUnified: [Unigram] = []
   arrStructUnified += rawDictForKanjis(isCHS: isCHS)
   arrStructUnified += rawDictForNonKanjis(isCHS: isCHS)
   arrStructUnified += rawDictForPhrases(isCHS: isCHS)
   // 計算權重且排序
   arrStructUnified = weightAndSort(arrStructUnified, isCHS: isCHS)
-  for entry in arrStructUnified {
-    let theKey = entry.valPhone
-    let theValue = (String(entry.valWeight) + " " + entry.valPhrase)
+
+  // 資料重複性檢查
+  NSLog(" - \(i18n): 執行資料重複性檢查，會在之後再給出對應的檢查結果。")
+  var setAlreadyInserted = Set<String>()
+  var arrFoundedDuplications = [String]()
+
+  // 健康狀況檢查
+  NSLog(" - \(i18n): 執行資料健康狀況檢查。")
+  print(healthCheck(arrStructUnified))
+  for unigram in arrStructUnified {
+    if setAlreadyInserted.contains(unigram.value + "\t" + unigram.key) {
+      arrFoundedDuplications.append(unigram.value + "\t" + unigram.key)
+    } else {
+      setAlreadyInserted.insert(unigram.value + "\t" + unigram.key)
+    }
+
+    let theKey = unigram.key
+    let theValue = (String(unigram.score) + " " + unigram.value)
     rangeMap[cnvPhonabetToASCII(theKey), default: []].append(theValue.data(using: .utf8)!)
     strPrintLine +=
-      entry.valPhone + " " + entry.valPhrase + " " + String(entry.valWeight)
+      unigram.key + " " + unigram.value + " " + String(unigram.score)
       + "\n"
   }
   NSLog(" - \(i18n): 要寫入檔案的 txt 內容編譯完畢。")
@@ -476,6 +492,12 @@ func fileOutput(isCHS: Bool) {
     NSLog(" - \(i18n): Error on writing strings to file: \(error)")
   }
   NSLog(" - \(i18n): 寫入完成。")
+  if !arrFoundedDuplications.isEmpty {
+    NSLog(" - \(i18n): 尋得下述重複項目，請務必手動排查：")
+    print("-------------------")
+    print(arrFoundedDuplications.joined(separator: "\n"))
+  }
+  print("===================")
 }
 
 func commonFileOutput() {
@@ -555,3 +577,268 @@ func main() {
 }
 
 main()
+
+// MARK: - 辭庫健康狀況檢查專用函式
+
+func healthCheck(_ data: [Unigram]) -> String {
+  var result = ""
+  var unigramMonoChar = [String: Unigram]()
+  var valueToScore = [String: Double]()
+  let unigramMonoCharCounter = data.filter { $0.score > -14 && $0.key.split(separator: "-").count == 1 }.count
+  let unigramPolyCharCounter = data.filter { $0.score > -14 && $0.key.split(separator: "-").count > 1 }.count
+
+  // 核心字詞庫的內容頻率一般大於 -10，但也得考慮某些包含假名的合成詞。
+  for neta in data.filter({ $0.score > -14 }) {
+    valueToScore[neta.value] = max(neta.score, valueToScore[neta.value] ?? -14)
+    let theKeySliceArr = neta.key.split(separator: "-")
+    guard let theKey = theKeySliceArr.first, theKeySliceArr.count == 1 else { continue }
+    if unigramMonoChar.keys.contains(String(theKey)), let theRecord = unigramMonoChar[String(theKey)] {
+      if neta.score > theRecord.score { unigramMonoChar[String(theKey)] = neta }
+    } else {
+      unigramMonoChar[String(theKey)] = neta
+    }
+  }
+
+  var faulty = [Unigram]()
+  var indifferents: [(String, String, Double, [Unigram], Double)] = []
+  var insufficients: [(String, String, Double, [Unigram], Double)] = []
+  var competingUnigrams = [(String, Double, String, Double)]()
+
+  for neta in data.filter({ $0.key.split(separator: "-").count >= 2 && $0.score > -14 }) {
+    var competants = [Unigram]()
+    var tscore: Double = 0
+    var bad = false
+    for x in neta.key.split(separator: "-") {
+      if !unigramMonoChar.keys.contains(String(x)) {
+        bad = true
+        break
+      }
+      guard let u = unigramMonoChar[String(x)] else { continue }
+      tscore += u.score
+      competants.append(u)
+    }
+    if bad {
+      faulty.append(neta)
+      continue
+    }
+    if tscore >= neta.score {
+      let instance = (neta.key, neta.value, neta.score, competants, neta.score - tscore)
+      let valueJoined = String(competants.map(\.value).joined(separator: ""))
+      if neta.value == valueJoined {
+        indifferents.append(instance)
+      } else {
+        if valueToScore.keys.contains(valueJoined), neta.value != valueJoined {
+          if let valueJoinedScore = valueToScore[valueJoined], neta.score < valueJoinedScore {
+            competingUnigrams.append((neta.value, neta.score, valueJoined, valueJoinedScore))
+          }
+        }
+        insufficients.append(instance)
+      }
+    }
+  }
+
+  insufficients = insufficients.sorted(by: { lhs, rhs -> Bool in
+    (lhs.2) > (rhs.2)
+  })
+  competingUnigrams = competingUnigrams.sorted(by: { lhs, rhs -> Bool in
+    (lhs.1 - lhs.3) > (rhs.1 - rhs.3)
+  })
+
+  let separator: String = {
+    var result = ""
+    for _ in 0..<72 { result += "-" }
+    return result
+  }()
+
+  func printl(_ input: String) {
+    result += input + "\n"
+  }
+
+  printl(separator)
+  printl("持單個字符的有效單元圖數量：\(unigramMonoCharCounter)")
+  printl("持多個字符的有效單元圖數量：\(unigramPolyCharCounter)")
+
+  printl(separator)
+  printl("總結一下那些容易被單個漢字的字頻干擾輸入的詞組單元圖：")
+  printl("因干擾組件和字詞本身完全重疊、而不需要處理的單元圖的數量：\(indifferents.count)")
+  printl(
+    "有 \(insufficients.count) 個複字單元圖被自身成分讀音對應的其它單字單元圖奪權，約佔全部有效單元圖的 \(insufficients.count / unigramPolyCharCounter * 100)%，"
+  )
+  printl("\n其中有：")
+
+  var insufficientsMap = [Int: [(String, String, Double, [Unigram], Double)]]()
+  for x in 2...10 {
+    insufficientsMap[x] = insufficients.filter { $0.0.split(separator: "-").count == x }
+  }
+
+  printl("  \(insufficientsMap[2]?.count ?? 0) 個有效雙字單元圖")
+  printl("  \(insufficientsMap[3]?.count ?? 0) 個有效三字單元圖")
+  printl("  \(insufficientsMap[4]?.count ?? 0) 個有效四字單元圖")
+  printl("  \(insufficientsMap[5]?.count ?? 0) 個有效五字單元圖")
+  printl("  \(insufficientsMap[6]?.count ?? 0) 個有效六字單元圖")
+  printl("  \(insufficientsMap[7]?.count ?? 0) 個有效七字單元圖")
+  printl("  \(insufficientsMap[8]?.count ?? 0) 個有效八字單元圖")
+  printl("  \(insufficientsMap[9]?.count ?? 0) 個有效九字單元圖")
+  printl("  \(insufficientsMap[10]?.count ?? 0) 個有效十字單元圖")
+
+  if let insufficientsMap2 = insufficientsMap[2], !insufficientsMap2.isEmpty {
+    printl(separator)
+    printl("前二十五個被奪權的有效雙字單元圖")
+    for (i, content) in insufficientsMap2.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += content.1 + ","
+      contentToPrint += String(content.2) + ","
+      contentToPrint += "[" + content.3.map(\.description).joined(separator: ",") + "]" + ","
+      contentToPrint += String(content.4) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if let insufficientsMap3 = insufficientsMap[3], !insufficientsMap3.isEmpty {
+    printl(separator)
+    printl("前二十五個被奪權的有效三字單元圖")
+    for (i, content) in insufficientsMap3.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += content.1 + ","
+      contentToPrint += String(content.2) + ","
+      contentToPrint += "[" + content.3.map(\.description).joined(separator: ",") + "]" + ","
+      contentToPrint += String(content.4) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if let insufficientsMap4 = insufficientsMap[4], !insufficientsMap4.isEmpty {
+    printl(separator)
+    printl("前二十五個被奪權的有效四字單元圖")
+    for (i, content) in insufficientsMap4.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += content.1 + ","
+      contentToPrint += String(content.2) + ","
+      contentToPrint += "[" + content.3.map(\.description).joined(separator: ",") + "]" + ","
+      contentToPrint += String(content.4) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if let insufficientsMap5 = insufficientsMap[5], !insufficientsMap5.isEmpty {
+    printl(separator)
+    printl("前二十五個被奪權的有效五字單元圖")
+    for (i, content) in insufficientsMap5.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += content.1 + ","
+      contentToPrint += String(content.2) + ","
+      contentToPrint += "[" + content.3.map(\.description).joined(separator: ",") + "]" + ","
+      contentToPrint += String(content.4) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if let insufficientsMap6 = insufficientsMap[6], !insufficientsMap6.isEmpty {
+    printl(separator)
+    printl("前二十五個被奪權的有效六字單元圖")
+    for (i, content) in insufficientsMap6.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += content.1 + ","
+      contentToPrint += String(content.2) + ","
+      contentToPrint += "[" + content.3.map(\.description).joined(separator: ",") + "]" + ","
+      contentToPrint += String(content.4) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if let insufficientsMap7 = insufficientsMap[7], !insufficientsMap7.isEmpty {
+    printl(separator)
+    printl("前二十五個被奪權的有效七字單元圖")
+    for (i, content) in insufficientsMap7.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += content.1 + ","
+      contentToPrint += String(content.2) + ","
+      contentToPrint += "[" + content.3.map(\.description).joined(separator: ",") + "]" + ","
+      contentToPrint += String(content.4) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if let insufficientsMap8 = insufficientsMap[8], !insufficientsMap8.isEmpty {
+    printl(separator)
+    printl("前二十五個被奪權的有效八字單元圖")
+    for (i, content) in insufficientsMap8.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += content.1 + ","
+      contentToPrint += String(content.2) + ","
+      contentToPrint += "[" + content.3.map(\.description).joined(separator: ",") + "]" + ","
+      contentToPrint += String(content.4) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if let insufficientsMap9 = insufficientsMap[9], !insufficientsMap9.isEmpty {
+    printl(separator)
+    printl("前二十五個被奪權的有效九字單元圖")
+    for (i, content) in insufficientsMap9.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += content.1 + ","
+      contentToPrint += String(content.2) + ","
+      contentToPrint += "[" + content.3.map(\.description).joined(separator: ",") + "]" + ","
+      contentToPrint += String(content.4) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if let insufficientsMap10 = insufficientsMap[10], !insufficientsMap10.isEmpty {
+    printl(separator)
+    printl("前二十五個被奪權的有效十字單元圖")
+    for (i, content) in insufficientsMap10.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += content.1 + ","
+      contentToPrint += String(content.2) + ","
+      contentToPrint += "[" + content.3.map(\.description).joined(separator: ",") + "]" + ","
+      contentToPrint += String(content.4) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if !competingUnigrams.isEmpty {
+    printl(separator)
+    printl("也發現有 \(competingUnigrams.count) 個複字單元圖被某些由高頻單字組成的複字單元圖奪權的情況，")
+    printl("例如（前二十五例）：")
+    for (i, content) in competingUnigrams.enumerated() {
+      if i == 25 { break }
+      var contentToPrint = "{"
+      contentToPrint += content.0 + ","
+      contentToPrint += String(content.1) + ","
+      contentToPrint += content.2 + ","
+      contentToPrint += String(content.3) + "}"
+      printl(contentToPrint)
+    }
+  }
+
+  if !faulty.isEmpty {
+    printl(separator)
+    printl("下述單元圖用到了漢字核心表當中尚未收錄的讀音，可能無法正常輸入：")
+    for content in faulty {
+      printl(content.description)
+    }
+  }
+
+  result += "\n"
+  return result
+}