Megrez // v1.2.8 update + UOM punctuation conditioning fix.
This commit is contained in:
parent
3db04310a2
commit
4ff9051c17
|
@ -56,7 +56,7 @@ class KeyHandler {
|
||||||
var compositor: Megrez.Compositor // 組字器
|
var compositor: Megrez.Compositor // 組字器
|
||||||
var currentLM: vChewing.LMInstantiator = .init() // 當前主語言模組
|
var currentLM: vChewing.LMInstantiator = .init() // 當前主語言模組
|
||||||
var currentUOM: vChewing.LMUserOverride = .init() // 當前半衰記憶模組
|
var currentUOM: vChewing.LMUserOverride = .init() // 當前半衰記憶模組
|
||||||
var walkedAnchors: [Megrez.NodeAnchor] = [] // 用以記錄爬過的節錨的陣列
|
var walkedAnchors: [Megrez.NodeAnchor] { compositor.walkedAnchors } // 用以記錄爬過的節錨的陣列
|
||||||
/// 委任物件 (ctlInputMethod),以便呼叫其中的函式。
|
/// 委任物件 (ctlInputMethod),以便呼叫其中的函式。
|
||||||
var delegate: KeyHandlerDelegate?
|
var delegate: KeyHandlerDelegate?
|
||||||
|
|
||||||
|
@ -95,7 +95,6 @@ class KeyHandler {
|
||||||
func clear() {
|
func clear() {
|
||||||
composer.clear()
|
composer.clear()
|
||||||
compositor.clear()
|
compositor.clear()
|
||||||
walkedAnchors.removeAll()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Functions dealing with Megrez.
|
// MARK: - Functions dealing with Megrez.
|
||||||
|
@ -103,7 +102,7 @@ class KeyHandler {
|
||||||
/// 實際上要拿給 Megrez 使用的的滑鼠游標位址,以方便在組字器最開頭或者最末尾的時候始終能抓取候選字節點陣列。
|
/// 實際上要拿給 Megrez 使用的的滑鼠游標位址,以方便在組字器最開頭或者最末尾的時候始終能抓取候選字節點陣列。
|
||||||
///
|
///
|
||||||
/// 威注音對游標前置與游標後置模式採取的候選字節點陣列抓取方法是分離的,且不使用 Node Crossing。
|
/// 威注音對游標前置與游標後置模式採取的候選字節點陣列抓取方法是分離的,且不使用 Node Crossing。
|
||||||
var actualCandidateCursorIndex: Int {
|
var actualCandidateCursor: Int {
|
||||||
mgrPrefs.useRearCursorMode ? min(compositorCursorIndex, compositorLength - 1) : max(compositorCursorIndex, 1)
|
mgrPrefs.useRearCursorMode ? min(compositorCursorIndex, compositorLength - 1) : max(compositorCursorIndex, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,11 +112,11 @@ class KeyHandler {
|
||||||
///
|
///
|
||||||
/// 該函式的爬取順序是從頭到尾。
|
/// 該函式的爬取順序是從頭到尾。
|
||||||
func walk() {
|
func walk() {
|
||||||
walkedAnchors = compositor.walk()
|
compositor.walk()
|
||||||
|
|
||||||
// 在偵錯模式開啟時,將 GraphViz 資料寫入至指定位置。
|
// 在偵錯模式開啟時,將 GraphViz 資料寫入至指定位置。
|
||||||
if mgrPrefs.isDebugModeEnabled {
|
if mgrPrefs.isDebugModeEnabled {
|
||||||
let result = compositor.grid.dumpDOT
|
let result = compositor.dumpDOT
|
||||||
do {
|
do {
|
||||||
try result.write(
|
try result.write(
|
||||||
toFile: "/private/var/tmp/vChewing-visualization.dot",
|
toFile: "/private/var/tmp/vChewing-visualization.dot",
|
||||||
|
@ -137,12 +136,10 @@ class KeyHandler {
|
||||||
/// 估算對象範圍。用比較形象且生動卻有點噁心的解釋的話,蒼蠅一邊吃一邊屙。
|
/// 估算對象範圍。用比較形象且生動卻有點噁心的解釋的話,蒼蠅一邊吃一邊屙。
|
||||||
var commitOverflownCompositionAndWalk: String {
|
var commitOverflownCompositionAndWalk: String {
|
||||||
var textToCommit = ""
|
var textToCommit = ""
|
||||||
if compositor.grid.width > mgrPrefs.composingBufferSize, !walkedAnchors.isEmpty {
|
if compositor.width > mgrPrefs.composingBufferSize, !walkedAnchors.isEmpty {
|
||||||
let anchor: Megrez.NodeAnchor = walkedAnchors[0]
|
let anchor: Megrez.NodeAnchor = walkedAnchors[0]
|
||||||
if let theNode = anchor.node {
|
textToCommit = anchor.node.currentPair.value
|
||||||
textToCommit = theNode.currentKeyValue.value
|
compositor.removeHeadReadings(count: anchor.spanLength)
|
||||||
}
|
|
||||||
compositor.removeHeadReadings(count: anchor.spanningLength)
|
|
||||||
}
|
}
|
||||||
walk()
|
walk()
|
||||||
return textToCommit
|
return textToCommit
|
||||||
|
@ -166,26 +163,22 @@ class KeyHandler {
|
||||||
/// - value: 給定之候選字字串。
|
/// - value: 給定之候選字字串。
|
||||||
/// - respectCursorPushing: 若該選項為 true,則會在選字之後始終將游標推送至選字厚的節錨的前方。
|
/// - respectCursorPushing: 若該選項為 true,則會在選字之後始終將游標推送至選字厚的節錨的前方。
|
||||||
func fixNode(value: String, respectCursorPushing: Bool = true) {
|
func fixNode(value: String, respectCursorPushing: Bool = true) {
|
||||||
let adjustedIndex = max(0, min(actualCandidateCursorIndex + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength))
|
let adjustedCursor = max(0, min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength))
|
||||||
// 開始讓半衰模組觀察目前的狀況。
|
// 開始讓半衰模組觀察目前的狀況。
|
||||||
let selectedNode: Megrez.NodeAnchor = compositor.grid.fixNodeSelectedCandidate(
|
let selectedNode: Megrez.NodeAnchor = compositor.fixNodeSelectedCandidate(value, at: adjustedCursor)
|
||||||
location: adjustedIndex, value: value
|
|
||||||
)
|
|
||||||
// 不要針對逐字選字模式啟用臨時半衰記憶模型。
|
// 不要針對逐字選字模式啟用臨時半衰記憶模型。
|
||||||
if !mgrPrefs.useSCPCTypingMode {
|
if !mgrPrefs.useSCPCTypingMode {
|
||||||
var addToUserOverrideModel = true
|
var addToUserOverrideModel = true
|
||||||
// 所有讀音數與字符數不匹配的情況均不得塞入半衰記憶模組。
|
// 所有讀音數與字符數不匹配的情況均不得塞入半衰記憶模組。
|
||||||
if selectedNode.spanningLength != value.count {
|
if selectedNode.spanLength != value.count {
|
||||||
IME.prtDebugIntel("UOM: SpanningLength != value.count, dismissing.")
|
IME.prtDebugIntel("UOM: SpanningLength != value.count, dismissing.")
|
||||||
addToUserOverrideModel = false
|
addToUserOverrideModel = false
|
||||||
}
|
}
|
||||||
if addToUserOverrideModel {
|
if addToUserOverrideModel {
|
||||||
if let theNode = selectedNode.node {
|
// 威注音的 SymbolLM 的 Score 是 -12,符合該條件的內容不得塞入半衰記憶模組。
|
||||||
// 威注音的 SymbolLM 的 Score 是 -12,符合該條件的內容不得塞入半衰記憶模組。
|
if selectedNode.node.scoreFor(candidate: value) <= -12 {
|
||||||
if theNode.scoreFor(candidate: value) <= -12 {
|
IME.prtDebugIntel("UOM: Score <= -12, dismissing.")
|
||||||
IME.prtDebugIntel("UOM: Score <= -12, dismissing.")
|
addToUserOverrideModel = false
|
||||||
addToUserOverrideModel = false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if addToUserOverrideModel {
|
if addToUserOverrideModel {
|
||||||
|
@ -193,7 +186,7 @@ class KeyHandler {
|
||||||
// 令半衰記憶模組觀測給定的三元圖。
|
// 令半衰記憶模組觀測給定的三元圖。
|
||||||
// 這個過程會讓半衰引擎根據當前上下文生成三元圖索引鍵。
|
// 這個過程會讓半衰引擎根據當前上下文生成三元圖索引鍵。
|
||||||
currentUOM.observe(
|
currentUOM.observe(
|
||||||
walkedAnchors: walkedAnchors, cursorIndex: adjustedIndex, candidate: value,
|
walkedAnchors: walkedAnchors, cursorIndex: adjustedCursor, candidate: value,
|
||||||
timestamp: NSDate().timeIntervalSince1970
|
timestamp: NSDate().timeIntervalSince1970
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -206,8 +199,8 @@ class KeyHandler {
|
||||||
if mgrPrefs.moveCursorAfterSelectingCandidate, respectCursorPushing {
|
if mgrPrefs.moveCursorAfterSelectingCandidate, respectCursorPushing {
|
||||||
var nextPosition = 0
|
var nextPosition = 0
|
||||||
for theAnchor in walkedAnchors {
|
for theAnchor in walkedAnchors {
|
||||||
if nextPosition >= adjustedIndex { break }
|
if nextPosition >= adjustedCursor { break }
|
||||||
nextPosition += theAnchor.spanningLength
|
nextPosition += theAnchor.spanLength
|
||||||
}
|
}
|
||||||
if nextPosition <= compositorLength {
|
if nextPosition <= compositorLength {
|
||||||
compositorCursorIndex = nextPosition
|
compositorCursorIndex = nextPosition
|
||||||
|
@ -217,20 +210,17 @@ class KeyHandler {
|
||||||
|
|
||||||
/// 組字器內超出最大動態爬軌範圍的節錨都會被自動標記為「已經手動選字過」,減少爬軌運算負擔。
|
/// 組字器內超出最大動態爬軌範圍的節錨都會被自動標記為「已經手動選字過」,減少爬軌運算負擔。
|
||||||
func markNodesFixedIfNecessary() {
|
func markNodesFixedIfNecessary() {
|
||||||
let width = compositor.grid.width
|
let width = compositor.width
|
||||||
if width <= kMaxComposingBufferNeedsToWalkSize {
|
if width <= kMaxComposingBufferNeedsToWalkSize {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
var index = 0
|
var index = 0
|
||||||
for anchor in walkedAnchors {
|
for anchor in walkedAnchors {
|
||||||
guard let node = anchor.node else { break }
|
|
||||||
if index >= width - kMaxComposingBufferNeedsToWalkSize { break }
|
if index >= width - kMaxComposingBufferNeedsToWalkSize { break }
|
||||||
if node.score < node.kSelectedCandidateScore {
|
if anchor.node.score < Megrez.Node.kSelectedCandidateScore {
|
||||||
compositor.grid.fixNodeSelectedCandidate(
|
compositor.fixNodeSelectedCandidate(anchor.node.currentPair.value, at: index + anchor.spanLength)
|
||||||
location: index + anchor.spanningLength, value: node.currentKeyValue.value
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
index += anchor.spanningLength
|
index += anchor.spanLength
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -248,14 +238,11 @@ class KeyHandler {
|
||||||
arrAnchors = arrAnchors.stableSort { $0.keyLength > $1.keyLength }
|
arrAnchors = arrAnchors.stableSort { $0.keyLength > $1.keyLength }
|
||||||
|
|
||||||
// 將節錨內的候選字詞資料拓印到輸出陣列內。
|
// 將節錨內的候選字詞資料拓印到輸出陣列內。
|
||||||
for currentNodeAnchor in arrAnchors {
|
for currentCandidate in arrAnchors.map(\.node.candidates).joined() {
|
||||||
guard let currentNode = currentNodeAnchor.node else { continue }
|
// 選字窗的內容的康熙轉換 / JIS 轉換不能放在這裡處理,會影響選字有效性。
|
||||||
for currentCandidate in currentNode.candidates {
|
// 選字的原理是拿著具體的候選字詞的字串去當前的節錨下找出對應的候選字詞(X元圖)。
|
||||||
// 選字窗的內容的康熙轉換 / JIS 轉換不能放在這裡處理,會影響選字有效性。
|
// 一旦在這裡轉換了,節錨內的某些元圖就無法被選中。
|
||||||
// 選字的原理是拿著具體的候選字詞的字串去當前的節錨下找出對應的候選字詞(X元圖)。
|
arrCandidates.append(currentCandidate.value)
|
||||||
// 一旦在這裡轉換了,節錨內的某些元圖就無法被選中。
|
|
||||||
arrCandidates.append(currentCandidate.value)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// 決定是否根據半衰記憶模組的建議來調整候選字詞的順序。
|
// 決定是否根據半衰記憶模組的建議來調整候選字詞的順序。
|
||||||
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel || mgrPrefs.useSCPCTypingMode || fixOrder {
|
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel || mgrPrefs.useSCPCTypingMode || fixOrder {
|
||||||
|
@ -291,8 +278,8 @@ class KeyHandler {
|
||||||
if !overrideValue.isEmpty {
|
if !overrideValue.isEmpty {
|
||||||
IME.prtDebugIntel(
|
IME.prtDebugIntel(
|
||||||
"UOM: Suggestion retrieved, overriding the node score of the selected candidate.")
|
"UOM: Suggestion retrieved, overriding the node score of the selected candidate.")
|
||||||
compositor.grid.overrideNodeScoreForSelectedCandidate(
|
compositor.overrideNodeScoreForSelectedCandidate(
|
||||||
location: min(actualCandidateCursorIndex + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength),
|
location: min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength),
|
||||||
value: overrideValue,
|
value: overrideValue,
|
||||||
overridingScore: findHighestScore(nodeAnchors: rawAnchorsOfNodes, epsilon: kEpsilon)
|
overridingScore: findHighestScore(nodeAnchors: rawAnchorsOfNodes, epsilon: kEpsilon)
|
||||||
)
|
)
|
||||||
|
@ -307,7 +294,7 @@ class KeyHandler {
|
||||||
/// - epsilon: 半衰模組的衰減指數。
|
/// - epsilon: 半衰模組的衰減指數。
|
||||||
/// - Returns: 尋獲的最高權重數值。
|
/// - Returns: 尋獲的最高權重數值。
|
||||||
func findHighestScore(nodeAnchors: [Megrez.NodeAnchor], epsilon: Double) -> Double {
|
func findHighestScore(nodeAnchors: [Megrez.NodeAnchor], epsilon: Double) -> Double {
|
||||||
return nodeAnchors.compactMap(\.node?.highestUnigramScore).max() ?? 0 + epsilon
|
return nodeAnchors.map(\.node.highestUnigramScore).max() ?? 0 + epsilon
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Extracted methods and functions (Tekkon).
|
// MARK: - Extracted methods and functions (Tekkon).
|
||||||
|
@ -363,8 +350,8 @@ class KeyHandler {
|
||||||
/// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。
|
/// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。
|
||||||
/// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的。
|
/// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的。
|
||||||
mgrPrefs.useRearCursorMode
|
mgrPrefs.useRearCursorMode
|
||||||
? compositor.grid.nodesBeginningAt(location: actualCandidateCursorIndex)
|
? compositor.nodesBeginningAt(location: actualCandidateCursor)
|
||||||
: compositor.grid.nodesEndingAt(location: actualCandidateCursorIndex)
|
: compositor.nodesEndingAt(location: actualCandidateCursor)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 將輸入法偏好設定同步至語言模組內。
|
/// 將輸入法偏好設定同步至語言模組內。
|
||||||
|
@ -390,7 +377,7 @@ class KeyHandler {
|
||||||
|
|
||||||
/// 在組字器的給定游標位置內插入讀音。
|
/// 在組字器的給定游標位置內插入讀音。
|
||||||
func insertToCompositorAtCursor(reading: String) {
|
func insertToCompositorAtCursor(reading: String) {
|
||||||
compositor.insertReadingAtCursor(reading: reading)
|
compositor.insertReading(reading)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 組字器的游標位置。
|
/// 組字器的游標位置。
|
||||||
|
@ -408,28 +395,27 @@ class KeyHandler {
|
||||||
///
|
///
|
||||||
/// 在威注音的術語體系當中,「與文字輸入方向相反的方向」為向後(Rear)。
|
/// 在威注音的術語體系當中,「與文字輸入方向相反的方向」為向後(Rear)。
|
||||||
func deleteCompositorReadingAtTheRearOfCursor() {
|
func deleteCompositorReadingAtTheRearOfCursor() {
|
||||||
compositor.deleteReadingAtTheRearOfCursor()
|
compositor.dropReading(direction: .rear)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 在組字器內,朝著往文字輸入方向、砍掉一個與游標相鄰的讀音。
|
/// 在組字器內,朝著往文字輸入方向、砍掉一個與游標相鄰的讀音。
|
||||||
///
|
///
|
||||||
/// 在威注音的術語體系當中,「文字輸入方向」為向前(Front)。
|
/// 在威注音的術語體系當中,「文字輸入方向」為向前(Front)。
|
||||||
func deleteCompositorReadingToTheFrontOfCursor() {
|
func deleteCompositorReadingToTheFrontOfCursor() {
|
||||||
compositor.deleteReadingToTheFrontOfCursor()
|
compositor.dropReading(direction: .front)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 獲取指定游標位置的鍵值長度。
|
/// 獲取指定游標位置的鍵值長度。
|
||||||
/// - Returns: 指定游標位置的鍵值長度。
|
/// - Returns: 指定游標位置的鍵值長度。
|
||||||
var keyLengthAtCurrentIndex: Int {
|
var keyLengthAtCurrentIndex: Int {
|
||||||
guard let node = walkedAnchors[compositorCursorIndex].node else { return 0 }
|
walkedAnchors[compositorCursorIndex].node.key.split(separator: "-").count
|
||||||
return node.key.split(separator: "-").count
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var nextPhrasePosition: Int {
|
var nextPhrasePosition: Int {
|
||||||
var nextPosition = 0
|
var nextPosition = 0
|
||||||
for theAnchor in walkedAnchors {
|
for theAnchor in walkedAnchors {
|
||||||
if nextPosition > actualCandidateCursorIndex { break }
|
if nextPosition > actualCandidateCursor { break }
|
||||||
nextPosition += theAnchor.spanningLength
|
nextPosition += theAnchor.spanLength
|
||||||
}
|
}
|
||||||
return min(nextPosition, compositorLength)
|
return min(nextPosition, compositorLength)
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,15 +45,15 @@ extension KeyHandler {
|
||||||
/// 所以在這裡必須做糾偏處理。因為在用 Swift,所以可以用「.utf16」取代「NSString.length()」。
|
/// 所以在這裡必須做糾偏處理。因為在用 Swift,所以可以用「.utf16」取代「NSString.length()」。
|
||||||
/// 這樣就可以免除不必要的類型轉換。
|
/// 這樣就可以免除不必要的類型轉換。
|
||||||
for theAnchor in walkedAnchors {
|
for theAnchor in walkedAnchors {
|
||||||
guard let theNode = theAnchor.node else { continue }
|
let theNode = theAnchor.node
|
||||||
let strNodeValue = theNode.currentKeyValue.value
|
let strNodeValue = theNode.currentPair.value
|
||||||
composingBuffer += strNodeValue
|
composingBuffer += strNodeValue
|
||||||
let arrSplit: [String] = Array(strNodeValue).map { String($0) }
|
let arrSplit: [String] = Array(strNodeValue).map { String($0) }
|
||||||
let codepointCount = arrSplit.count
|
let codepointCount = arrSplit.count
|
||||||
/// 藉下述步驟重新將「可見游標位置」對齊至「組字器內的游標所在的讀音位置」。
|
/// 藉下述步驟重新將「可見游標位置」對齊至「組字器內的游標所在的讀音位置」。
|
||||||
/// 每個節錨(NodeAnchor)都有自身的幅位長度(spanningLength),可以用來
|
/// 每個節錨(NodeAnchor)都有自身的幅位長度(spanningLength),可以用來
|
||||||
/// 累加、以此為依據,來校正「可見游標位置」。
|
/// 累加、以此為依據,來校正「可見游標位置」。
|
||||||
let spanningLength: Int = theAnchor.spanningLength
|
let spanningLength: Int = theAnchor.spanLength
|
||||||
if readingCursorIndex + spanningLength <= compositorCursorIndex {
|
if readingCursorIndex + spanningLength <= compositorCursorIndex {
|
||||||
composedStringCursorIndex += strNodeValue.utf16.count
|
composedStringCursorIndex += strNodeValue.utf16.count
|
||||||
readingCursorIndex += spanningLength
|
readingCursorIndex += spanningLength
|
||||||
|
@ -406,22 +406,20 @@ extension KeyHandler {
|
||||||
|
|
||||||
var composed = ""
|
var composed = ""
|
||||||
|
|
||||||
for theAnchor in walkedAnchors {
|
for node in walkedAnchors.map(\.node) {
|
||||||
if let node = theAnchor.node {
|
var key = node.key
|
||||||
var key = node.key
|
if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin {
|
||||||
if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin {
|
key = Tekkon.restoreToneOneInZhuyinKey(target: key) // 恢復陰平標記
|
||||||
key = Tekkon.restoreToneOneInZhuyinKey(target: key) // 恢復陰平標記
|
key = Tekkon.cnvPhonaToHanyuPinyin(target: key) // 注音轉拼音
|
||||||
key = Tekkon.cnvPhonaToHanyuPinyin(target: key) // 注音轉拼音
|
key = Tekkon.cnvHanyuPinyinToTextbookStyle(target: key) // 轉教科書式標調
|
||||||
key = Tekkon.cnvHanyuPinyinToTextbookStyle(target: key) // 轉教科書式標調
|
key = key.replacingOccurrences(of: "-", with: " ")
|
||||||
key = key.replacingOccurrences(of: "-", with: " ")
|
} else {
|
||||||
} else {
|
key = Tekkon.cnvZhuyinChainToTextbookReading(target: key, newSeparator: " ")
|
||||||
key = Tekkon.cnvZhuyinChainToTextbookReading(target: key, newSeparator: " ")
|
|
||||||
}
|
|
||||||
|
|
||||||
let value = node.currentKeyValue.value
|
|
||||||
// 不要給標點符號等特殊元素加注音
|
|
||||||
composed += key.contains("_") ? value : "<ruby>\(value)<rp>(</rp><rt>\(key)</rt><rp>)</rp></ruby>"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let value = node.currentPair.value
|
||||||
|
// 不要給標點符號等特殊元素加注音
|
||||||
|
composed += key.contains("_") ? value : "<ruby>\(value)<rp>(</rp><rt>\(key)</rt><rp>)</rp></ruby>"
|
||||||
}
|
}
|
||||||
|
|
||||||
clear()
|
clear()
|
||||||
|
@ -796,26 +794,21 @@ extension KeyHandler {
|
||||||
var length = 0
|
var length = 0
|
||||||
var currentAnchor = Megrez.NodeAnchor()
|
var currentAnchor = Megrez.NodeAnchor()
|
||||||
let cursorIndex = min(
|
let cursorIndex = min(
|
||||||
actualCandidateCursorIndex + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength
|
actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength
|
||||||
)
|
)
|
||||||
for anchor in walkedAnchors {
|
for anchor in walkedAnchors {
|
||||||
length += anchor.spanningLength
|
length += anchor.spanLength
|
||||||
if length >= cursorIndex {
|
if length >= cursorIndex {
|
||||||
currentAnchor = anchor
|
currentAnchor = anchor
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
guard let currentNode = currentAnchor.node else {
|
let currentNode = currentAnchor.node
|
||||||
IME.prtDebugIntel("4F2DEC2F")
|
let currentValue = currentNode.currentPair.value
|
||||||
errorCallback()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
let currentValue = currentNode.currentKeyValue.value
|
|
||||||
|
|
||||||
var currentIndex = 0
|
var currentIndex = 0
|
||||||
if currentNode.score < currentNode.kSelectedCandidateScore {
|
if currentNode.score < Megrez.Node.kSelectedCandidateScore {
|
||||||
/// 只要是沒有被使用者手動選字過的(節錨下的)節點,
|
/// 只要是沒有被使用者手動選字過的(節錨下的)節點,
|
||||||
/// 就從第一個候選字詞開始,這樣使用者在敲字時就會優先匹配
|
/// 就從第一個候選字詞開始,這樣使用者在敲字時就會優先匹配
|
||||||
/// 那些字詞長度不小於 2 的單元圖。換言之,如果使用者敲了兩個
|
/// 那些字詞長度不小於 2 的單元圖。換言之,如果使用者敲了兩個
|
||||||
|
|
|
@ -28,7 +28,7 @@ import Foundation
|
||||||
|
|
||||||
extension vChewing {
|
extension vChewing {
|
||||||
/// 語言模組副本化模組(LMInstantiator,下稱「LMI」)自身為符合天權星組字引擎內
|
/// 語言模組副本化模組(LMInstantiator,下稱「LMI」)自身為符合天權星組字引擎內
|
||||||
/// 的 LanguageModelProtocol 協定的模組、統籌且整理來自其它子模組的資料(包括使
|
/// 的 LangModelProtocol 協定的模組、統籌且整理來自其它子模組的資料(包括使
|
||||||
/// 用者語彙、繪文字模組、語彙濾除表、原廠語言模組等)。
|
/// 用者語彙、繪文字模組、語彙濾除表、原廠語言模組等)。
|
||||||
///
|
///
|
||||||
/// LMI 型別為與輸入法按鍵調度模組直接溝通之唯一語言模組。當組字器開始根據給定的
|
/// LMI 型別為與輸入法按鍵調度模組直接溝通之唯一語言模組。當組字器開始根據給定的
|
||||||
|
@ -44,7 +44,7 @@ extension vChewing {
|
||||||
///
|
///
|
||||||
/// LMI 會根據需要分別載入原廠語言模組和其他個別的子語言模組。LMI 本身不會記錄這些
|
/// LMI 會根據需要分別載入原廠語言模組和其他個別的子語言模組。LMI 本身不會記錄這些
|
||||||
/// 語言模組的相關資料的存放位置,僅藉由參數來讀取相關訊息。
|
/// 語言模組的相關資料的存放位置,僅藉由參數來讀取相關訊息。
|
||||||
public class LMInstantiator: LanguageModelProtocol {
|
public class LMInstantiator: LangModelProtocol {
|
||||||
// 在函式內部用以記錄狀態的開關。
|
// 在函式內部用以記錄狀態的開關。
|
||||||
public var isPhraseReplacementEnabled = false
|
public var isPhraseReplacementEnabled = false
|
||||||
public var isCNSEnabled = false
|
public var isCNSEnabled = false
|
||||||
|
@ -256,7 +256,7 @@ extension vChewing {
|
||||||
lmAssociates.hasValuesFor(key: key)
|
lmAssociates.hasValuesFor(key: key)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 該函式不起作用,僅用來滿足 LanguageModelProtocol 協定的要求。
|
/// 該函式不起作用,僅用來滿足 LangModelProtocol 協定的要求。
|
||||||
public func bigramsForKeys(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() }
|
public func bigramsForKeys(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() }
|
||||||
|
|
||||||
// MARK: - 核心函式(對內)
|
// MARK: - 核心函式(對內)
|
||||||
|
|
|
@ -130,13 +130,12 @@ extension vChewing {
|
||||||
func convertKeyFrom(
|
func convertKeyFrom(
|
||||||
walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false
|
walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false
|
||||||
) -> String {
|
) -> String {
|
||||||
let arrEndingPunctuation = [",", "。", "!", "?", "」", "』", "”", "’"]
|
|
||||||
let whiteList = "你他妳她祢衪它牠再在"
|
let whiteList = "你他妳她祢衪它牠再在"
|
||||||
var arrNodes: [Megrez.NodeAnchor] = []
|
var arrNodes: [Megrez.NodeAnchor] = []
|
||||||
var intLength = 0
|
var intLength = 0
|
||||||
for theNodeAnchor in walkedAnchors {
|
for theNodeAnchor in walkedAnchors {
|
||||||
arrNodes.append(theNodeAnchor)
|
arrNodes.append(theNodeAnchor)
|
||||||
intLength += theNodeAnchor.spanningLength
|
intLength += theNodeAnchor.spanLength
|
||||||
if intLength >= cursorIndex {
|
if intLength >= cursorIndex {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -146,9 +145,8 @@ extension vChewing {
|
||||||
|
|
||||||
arrNodes = Array(arrNodes.reversed())
|
arrNodes = Array(arrNodes.reversed())
|
||||||
|
|
||||||
guard let kvCurrent = arrNodes[0].node?.currentKeyValue,
|
let kvCurrent = arrNodes[0].node.currentPair
|
||||||
!arrEndingPunctuation.contains(kvCurrent.value)
|
guard !kvCurrent.key.contains("_") else {
|
||||||
else {
|
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,20 +171,18 @@ extension vChewing {
|
||||||
}
|
}
|
||||||
|
|
||||||
if arrNodes.count >= 2,
|
if arrNodes.count >= 2,
|
||||||
let kvPreviousThisOne = arrNodes[1].node?.currentKeyValue,
|
!kvPrevious.key.contains("_"),
|
||||||
!arrEndingPunctuation.contains(kvPrevious.value),
|
|
||||||
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
|
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
|
||||||
{
|
{
|
||||||
kvPrevious = kvPreviousThisOne
|
kvPrevious = arrNodes[1].node.currentPair
|
||||||
readingStack = kvPrevious.key + readingStack
|
readingStack = kvPrevious.key + readingStack
|
||||||
}
|
}
|
||||||
|
|
||||||
if arrNodes.count >= 3,
|
if arrNodes.count >= 3,
|
||||||
let kvAnteriorThisOne = arrNodes[2].node?.currentKeyValue,
|
!kvAnterior.key.contains("_"),
|
||||||
!arrEndingPunctuation.contains(kvAnterior.value),
|
|
||||||
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
|
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
|
||||||
{
|
{
|
||||||
kvAnterior = kvAnteriorThisOne
|
kvAnterior = arrNodes[2].node.currentPair
|
||||||
readingStack = kvAnterior.key + readingStack
|
readingStack = kvAnterior.key + readingStack
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,89 +25,106 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
extension Megrez {
|
extension Megrez {
|
||||||
/// 組字器。
|
/// 組字器。
|
||||||
public class Compositor {
|
public class Compositor: Grid {
|
||||||
|
/// 文字輸入方向
|
||||||
|
public enum TypingDirection { case front, rear }
|
||||||
/// 給被丟掉的節點路徑施加的負權重。
|
/// 給被丟掉的節點路徑施加的負權重。
|
||||||
private let kDroppedPathScore: Double = -999
|
private let kDroppedPathScore: Double = -999
|
||||||
/// 該組字器的游標位置。
|
/// 該組字器的游標位置。
|
||||||
private var mutCursorIndex: Int = 0
|
public var cursor: Int = 0 { didSet { cursor = max(0, min(cursor, readings.count)) } }
|
||||||
/// 該組字器的讀音陣列。
|
/// 該組字器的讀音陣列。
|
||||||
private var mutReadings: [String] = []
|
private(set) var readings: [String] = []
|
||||||
/// 該組字器的軌格。
|
|
||||||
private var mutGrid: Grid = .init()
|
|
||||||
/// 該組字器所使用的語言模型。
|
/// 該組字器所使用的語言模型。
|
||||||
private var mutLM: LanguageModelProtocol
|
private var langModel: LangModelProtocol
|
||||||
|
/// 允許查詢當前游標位置屬於第幾個幅位座標(從 0 開始算)。
|
||||||
|
private(set) var cursorRegionMap: [Int: Int] = .init()
|
||||||
|
private(set) var walkedAnchors: [Megrez.NodeAnchor] = [] // 用以記錄爬過的節錨的陣列
|
||||||
|
|
||||||
/// 公開:該組字器內可以允許的最大詞長。
|
|
||||||
public var maxBuildSpanLength: Int { mutGrid.maxBuildSpanLength }
|
|
||||||
/// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
|
/// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
|
||||||
public var joinSeparator: String = ""
|
public var joinSeparator: String = "-"
|
||||||
/// 公開:該組字器的游標位置。
|
|
||||||
public var cursorIndex: Int {
|
|
||||||
get { mutCursorIndex }
|
|
||||||
set { mutCursorIndex = (newValue < 0) ? 0 : min(newValue, mutReadings.count) }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 公開:該組字器是否為空。
|
|
||||||
public var isEmpty: Bool { mutGrid.isEmpty }
|
|
||||||
|
|
||||||
/// 公開:該組字器的軌格(唯讀)。
|
|
||||||
public var grid: Grid { mutGrid }
|
|
||||||
/// 公開:該組字器的長度,也就是內建漢字讀音的數量(唯讀)。
|
/// 公開:該組字器的長度,也就是內建漢字讀音的數量(唯讀)。
|
||||||
public var length: Int { mutReadings.count }
|
public var length: Int { readings.count }
|
||||||
/// 公開:該組字器的讀音陣列(唯讀)。
|
|
||||||
public var readings: [String] { mutReadings }
|
/// 按幅位來前後移動游標。
|
||||||
|
/// - Parameter direction: 移動方向
|
||||||
|
/// - Returns: 該操作是否順利完成。
|
||||||
|
@discardableResult public func jumpCursorBySpan(to direction: TypingDirection) -> Bool {
|
||||||
|
switch direction {
|
||||||
|
case .front:
|
||||||
|
if cursor == width { return false }
|
||||||
|
case .rear:
|
||||||
|
if cursor == 0 { return false }
|
||||||
|
}
|
||||||
|
guard let currentRegion = cursorRegionMap[cursor] else { return false }
|
||||||
|
|
||||||
|
let aRegionForward = max(currentRegion - 1, 0)
|
||||||
|
let currentRegionBorderRear: Int = walkedAnchors[0..<currentRegion].map(\.spanLength).reduce(0, +)
|
||||||
|
switch cursor {
|
||||||
|
case currentRegionBorderRear:
|
||||||
|
switch direction {
|
||||||
|
case .front:
|
||||||
|
cursor =
|
||||||
|
(currentRegion > walkedAnchors.count)
|
||||||
|
? readings.count : walkedAnchors[0...currentRegion].map(\.spanLength).reduce(0, +)
|
||||||
|
case .rear:
|
||||||
|
cursor = walkedAnchors[0..<aRegionForward].map(\.spanLength).reduce(0, +)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
switch direction {
|
||||||
|
case .front:
|
||||||
|
cursor = currentRegionBorderRear + walkedAnchors[currentRegion].spanLength
|
||||||
|
case .rear:
|
||||||
|
cursor = currentRegionBorderRear
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
/// 組字器。
|
/// 組字器。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - lm: 語言模型。可以是任何基於 Megrez.LanguageModel 的衍生型別。
|
/// - lm: 語言模型。可以是任何基於 Megrez.LangModel 的衍生型別。
|
||||||
/// - length: 指定該組字器內可以允許的最大詞長,預設為 10 字。
|
/// - length: 指定該組字器內可以允許的最大詞長,預設為 10 字。
|
||||||
/// - separator: 多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
|
/// - separator: 多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
|
||||||
public init(lm: LanguageModelProtocol, length: Int = 10, separator: String = "") {
|
public init(lm: LangModelProtocol, length: Int = 10, separator: String = "-") {
|
||||||
mutLM = lm
|
langModel = lm
|
||||||
mutGrid = .init(spanLength: abs(length)) // 防呆
|
super.init(spanLength: abs(length)) // 防呆
|
||||||
joinSeparator = separator
|
joinSeparator = separator
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 組字器自我清空專用函式。
|
/// 組字器自我清空專用函式。
|
||||||
public func clear() {
|
override public func clear() {
|
||||||
mutCursorIndex = 0
|
super.clear()
|
||||||
mutReadings.removeAll()
|
cursor = 0
|
||||||
mutGrid.clear()
|
readings.removeAll()
|
||||||
|
walkedAnchors.removeAll()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 在游標位置插入給定的讀音。
|
/// 在游標位置插入給定的讀音。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - reading: 要插入的讀音。
|
/// - reading: 要插入的讀音。
|
||||||
public func insertReadingAtCursor(reading: String) {
|
@discardableResult public func insertReading(_ reading: String) -> Bool {
|
||||||
mutReadings.insert(reading, at: mutCursorIndex)
|
guard !reading.isEmpty, langModel.hasUnigramsFor(key: reading) else { return false }
|
||||||
mutGrid.expandGridByOneAt(location: mutCursorIndex)
|
readings.insert(reading, at: cursor)
|
||||||
build()
|
resizeGridByOneAt(location: cursor, to: .expand)
|
||||||
mutCursorIndex += 1
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 朝著與文字輸入方向相反的方向、砍掉一個與游標相鄰的讀音。
|
|
||||||
/// 在威注音的術語體系當中,「與文字輸入方向相反的方向」為向後(Rear)。
|
|
||||||
@discardableResult public func deleteReadingAtTheRearOfCursor() -> Bool {
|
|
||||||
if mutCursorIndex == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
mutReadings.remove(at: mutCursorIndex - 1)
|
|
||||||
mutCursorIndex -= 1
|
|
||||||
mutGrid.shrinkGridByOneAt(location: mutCursorIndex)
|
|
||||||
build()
|
build()
|
||||||
|
cursor += 1
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 朝著往文字輸入方向、砍掉一個與游標相鄰的讀音。
|
/// 朝著指定方向砍掉一個與游標相鄰的讀音。
|
||||||
/// 在威注音的術語體系當中,「文字輸入方向」為向前(Front)。
|
///
|
||||||
@discardableResult public func deleteReadingToTheFrontOfCursor() -> Bool {
|
/// 在威注音的術語體系當中,「與文字輸入方向相反的方向」為向後(Rear),反之則為向前(Front)。
|
||||||
if mutCursorIndex == mutReadings.count {
|
/// - Parameter direction: 指定方向。
|
||||||
|
/// - Returns: 該操作是否順利完成。
|
||||||
|
@discardableResult public func dropReading(direction: TypingDirection) -> Bool {
|
||||||
|
let isBackSpace = direction == .rear
|
||||||
|
if cursor == (isBackSpace ? 0 : readings.count) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
readings.remove(at: cursor - (isBackSpace ? 1 : 0))
|
||||||
mutReadings.remove(at: mutCursorIndex)
|
cursor -= (isBackSpace ? 1 : 0)
|
||||||
mutGrid.shrinkGridByOneAt(location: mutCursorIndex)
|
resizeGridByOneAt(location: cursor, to: .shrink)
|
||||||
build()
|
build()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -118,98 +135,84 @@ extension Megrez {
|
||||||
/// 將該位置要溢出的敲字內容遞交之後、再執行這個函式。
|
/// 將該位置要溢出的敲字內容遞交之後、再執行這個函式。
|
||||||
@discardableResult public func removeHeadReadings(count: Int) -> Bool {
|
@discardableResult public func removeHeadReadings(count: Int) -> Bool {
|
||||||
let count = abs(count) // 防呆
|
let count = abs(count) // 防呆
|
||||||
if count > length {
|
if count > length { return false }
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
for _ in 0..<count {
|
for _ in 0..<count {
|
||||||
if mutCursorIndex > 0 {
|
cursor = max(cursor - 1, 0)
|
||||||
mutCursorIndex -= 1
|
if !readings.isEmpty {
|
||||||
}
|
readings.removeFirst()
|
||||||
if !mutReadings.isEmpty {
|
resizeGridByOneAt(location: 0, to: .shrink)
|
||||||
mutReadings.removeFirst()
|
|
||||||
mutGrid.shrinkGridByOneAt(location: 0)
|
|
||||||
}
|
}
|
||||||
build()
|
build()
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Walker
|
|
||||||
|
|
||||||
/// 對已給定的軌格按照給定的位置與條件進行正向爬軌。
|
/// 對已給定的軌格按照給定的位置與條件進行正向爬軌。
|
||||||
/// - Parameters:
|
/// - Returns: 一個包含有效結果的節錨陣列。
|
||||||
/// - location: 開始爬軌的位置。
|
@discardableResult public func walk() -> [NodeAnchor] {
|
||||||
/// - accumulatedScore: 給定累計權重,非必填參數。預設值為 0。
|
let newLocation = width
|
||||||
/// - joinedPhrase: 用以統計累計長詞的內部參數,請勿主動使用。
|
// 這裡把所有空節點都過濾掉。
|
||||||
/// - longPhrases: 用以統計累計長詞的內部參數,請勿主動使用。
|
walkedAnchors = Array(
|
||||||
public func walk(
|
reverseWalk(at: newLocation).reversed()
|
||||||
at location: Int = 0,
|
).lazy.filter { !$0.isEmpty }
|
||||||
score accumulatedScore: Double = 0.0,
|
updateCursorJumpingTables(walkedAnchors)
|
||||||
joinedPhrase: String = "",
|
return walkedAnchors
|
||||||
longPhrases: [String] = .init()
|
|
||||||
) -> [NodeAnchor] {
|
|
||||||
let newLocation = (mutGrid.width) - abs(location) // 防呆
|
|
||||||
return Array(
|
|
||||||
reverseWalk(
|
|
||||||
at: newLocation, score: accumulatedScore,
|
|
||||||
joinedPhrase: joinedPhrase, longPhrases: longPhrases
|
|
||||||
).reversed())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 對已給定的軌格按照給定的位置與條件進行反向爬軌。
|
// MARK: - Private functions
|
||||||
|
|
||||||
|
/// 內部專用反芻函式,對已給定的軌格按照給定的位置與條件進行反向爬軌。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - location: 開始爬軌的位置。
|
/// - location: 開始爬軌的位置。
|
||||||
/// - accumulatedScore: 給定累計權重,非必填參數。預設值為 0。
|
/// - mass: 給定累計權重,非必填參數。預設值為 0。
|
||||||
/// - joinedPhrase: 用以統計累計長詞的內部參數,請勿主動使用。
|
/// - joinedPhrase: 用以統計累計長詞的內部參數,請勿主動使用。
|
||||||
/// - longPhrases: 用以統計累計長詞的內部參數,請勿主動使用。
|
/// - longPhrases: 用以統計累計長詞的內部參數,請勿主動使用。
|
||||||
public func reverseWalk(
|
/// - Returns: 一個包含結果的節錨陣列。
|
||||||
|
private func reverseWalk(
|
||||||
at location: Int,
|
at location: Int,
|
||||||
score accumulatedScore: Double = 0.0,
|
mass: Double = 0.0,
|
||||||
joinedPhrase: String = "",
|
joinedPhrase: String = "",
|
||||||
longPhrases: [String] = .init()
|
longPhrases: [String] = .init()
|
||||||
) -> [NodeAnchor] {
|
) -> [NodeAnchor] {
|
||||||
let location = abs(location) // 防呆
|
let location = abs(location) // 防呆
|
||||||
if location == 0 || location > mutGrid.width {
|
if location == 0 || location > width {
|
||||||
return .init()
|
return .init()
|
||||||
}
|
}
|
||||||
|
|
||||||
var paths = [[NodeAnchor]]()
|
var paths = [[NodeAnchor]]()
|
||||||
var nodes = mutGrid.nodesEndingAt(location: location)
|
let nodes = nodesEndingAt(location: location).stableSorted {
|
||||||
|
|
||||||
nodes = nodes.stableSorted {
|
|
||||||
$0.scoreForSort > $1.scoreForSort
|
$0.scoreForSort > $1.scoreForSort
|
||||||
}
|
}
|
||||||
|
|
||||||
if let nodeZero = nodes[0].node, nodeZero.score >= nodeZero.kSelectedCandidateScore {
|
guard !nodes.isEmpty else { return .init() } // 防止下文出現範圍外索引的錯誤
|
||||||
|
|
||||||
|
if nodes[0].node.score >= Node.kSelectedCandidateScore {
|
||||||
// 在使用者有選過候選字詞的情況下,摒棄非依此據而成的節點路徑。
|
// 在使用者有選過候選字詞的情況下,摒棄非依此據而成的節點路徑。
|
||||||
var anchorZero = nodes[0]
|
var theAnchor = nodes[0]
|
||||||
anchorZero.accumulatedScore = accumulatedScore + nodeZero.score
|
theAnchor.mass = mass + nodes[0].node.score
|
||||||
var path: [NodeAnchor] = reverseWalk(
|
var path: [NodeAnchor] = reverseWalk(
|
||||||
at: location - anchorZero.spanningLength, score: anchorZero.accumulatedScore
|
at: location - theAnchor.spanLength, mass: theAnchor.mass
|
||||||
)
|
)
|
||||||
path.insert(anchorZero, at: 0)
|
path.insert(theAnchor, at: 0)
|
||||||
paths.append(path)
|
paths.append(path)
|
||||||
} else if !longPhrases.isEmpty {
|
} else if !longPhrases.isEmpty {
|
||||||
var path = [NodeAnchor]()
|
var path = [NodeAnchor]()
|
||||||
for theAnchor in nodes {
|
for theAnchor in nodes {
|
||||||
guard let theNode = theAnchor.node else { continue }
|
|
||||||
var theAnchor = theAnchor
|
var theAnchor = theAnchor
|
||||||
let joinedValue = theNode.currentKeyValue.value + joinedPhrase
|
let joinedValue = theAnchor.node.currentPair.value + joinedPhrase
|
||||||
// 如果只是一堆單漢字的節點組成了同樣的長詞的話,直接棄用這個節點路徑。
|
// 如果只是一堆單漢字的節點組成了同樣的長詞的話,直接棄用這個節點路徑。
|
||||||
// 打比方說「八/月/中/秋/山/林/涼」與「八月/中秋/山林/涼」在使用者來看
|
// 打比方說「八/月/中/秋/山/林/涼」與「八月/中秋/山林/涼」在使用者來看
|
||||||
// 是「結果等價」的,那就扔掉前者。
|
// 是「結果等價」的,那就扔掉前者。
|
||||||
if longPhrases.contains(joinedValue) {
|
if longPhrases.contains(joinedValue) {
|
||||||
theAnchor.accumulatedScore = kDroppedPathScore
|
theAnchor.mass = kDroppedPathScore
|
||||||
path.insert(theAnchor, at: 0)
|
path.insert(theAnchor, at: 0)
|
||||||
paths.append(path)
|
paths.append(path)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
theAnchor.accumulatedScore = accumulatedScore + theNode.score
|
theAnchor.mass = mass + theAnchor.node.score
|
||||||
path = reverseWalk(
|
path = reverseWalk(
|
||||||
at: location - theAnchor.spanningLength,
|
at: location - theAnchor.spanLength,
|
||||||
score: theAnchor.accumulatedScore,
|
mass: theAnchor.mass,
|
||||||
joinedPhrase: (joinedValue.count >= longPhrases[0].count) ? "" : joinedValue,
|
joinedPhrase: (joinedValue.count >= longPhrases[0].count) ? "" : joinedValue,
|
||||||
longPhrases: .init()
|
longPhrases: .init()
|
||||||
)
|
)
|
||||||
|
@ -219,9 +222,8 @@ extension Megrez {
|
||||||
} else {
|
} else {
|
||||||
// 看看當前格位有沒有更長的候選字詞。
|
// 看看當前格位有沒有更長的候選字詞。
|
||||||
var longPhrases = [String]()
|
var longPhrases = [String]()
|
||||||
for theAnchor in nodes.lazy.filter({ $0.spanningLength > 1 }) {
|
for theAnchor in nodes.lazy.filter({ $0.spanLength > 1 }) {
|
||||||
guard let theNode = theAnchor.node else { continue }
|
longPhrases.append(theAnchor.node.currentPair.value)
|
||||||
longPhrases.append(theNode.currentKeyValue.value)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
longPhrases = longPhrases.stableSorted {
|
longPhrases = longPhrases.stableSorted {
|
||||||
|
@ -229,12 +231,11 @@ extension Megrez {
|
||||||
}
|
}
|
||||||
for theAnchor in nodes {
|
for theAnchor in nodes {
|
||||||
var theAnchor = theAnchor
|
var theAnchor = theAnchor
|
||||||
guard let theNode = theAnchor.node else { continue }
|
theAnchor.mass = mass + theAnchor.node.score
|
||||||
theAnchor.accumulatedScore = accumulatedScore + theNode.score
|
|
||||||
var path = [NodeAnchor]()
|
var path = [NodeAnchor]()
|
||||||
path = reverseWalk(
|
path = reverseWalk(
|
||||||
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore,
|
at: location - theAnchor.spanLength, mass: theAnchor.mass,
|
||||||
joinedPhrase: (theAnchor.spanningLength > 1) ? "" : theNode.currentKeyValue.value,
|
joinedPhrase: (theAnchor.spanLength > 1) ? "" : theAnchor.node.currentPair.value,
|
||||||
longPhrases: .init()
|
longPhrases: .init()
|
||||||
)
|
)
|
||||||
path.insert(theAnchor, at: 0)
|
path.insert(theAnchor, at: 0)
|
||||||
|
@ -248,31 +249,29 @@ extension Megrez {
|
||||||
|
|
||||||
var result: [NodeAnchor] = paths[0]
|
var result: [NodeAnchor] = paths[0]
|
||||||
for neta in paths.lazy.filter({
|
for neta in paths.lazy.filter({
|
||||||
$0.last!.accumulatedScore > result.last!.accumulatedScore
|
$0.last!.mass > result.last!.mass
|
||||||
}) {
|
}) {
|
||||||
result = neta
|
result = neta
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
return result // 空節點過濾的步驟交給 walk() 這個對外函式,以避免重複執行清理步驟。
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Private functions
|
|
||||||
|
|
||||||
private func build() {
|
private func build() {
|
||||||
let itrBegin: Int =
|
let itrBegin: Int =
|
||||||
(mutCursorIndex < maxBuildSpanLength) ? 0 : mutCursorIndex - maxBuildSpanLength
|
(cursor < maxBuildSpanLength) ? 0 : cursor - maxBuildSpanLength
|
||||||
let itrEnd: Int = min(mutCursorIndex + maxBuildSpanLength, mutReadings.count)
|
let itrEnd: Int = min(cursor + maxBuildSpanLength, readings.count)
|
||||||
|
|
||||||
for p in itrBegin..<itrEnd {
|
for p in itrBegin..<itrEnd {
|
||||||
for q in 1..<maxBuildSpanLength {
|
for q in 1..<maxBuildSpanLength {
|
||||||
if p + q > itrEnd { break }
|
if p + q > itrEnd { break }
|
||||||
let arrSlice = mutReadings[p..<(p + q)]
|
let arrSlice = readings[p..<(p + q)]
|
||||||
let combinedReading: String = join(slice: arrSlice, separator: joinSeparator)
|
let combinedReading: String = join(slice: arrSlice, separator: joinSeparator)
|
||||||
if mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { continue }
|
if hasMatchedNode(location: p, spanLength: q, key: combinedReading) { continue }
|
||||||
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
|
let unigrams: [Unigram] = langModel.unigramsFor(key: combinedReading)
|
||||||
if unigrams.isEmpty { continue }
|
if unigrams.isEmpty { continue }
|
||||||
let n = Node(key: combinedReading, unigrams: unigrams)
|
let n = Node(key: combinedReading, unigrams: unigrams)
|
||||||
mutGrid.insertNode(node: n, location: p, spanningLength: q)
|
insertNode(node: n, location: p, spanLength: q)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -280,6 +279,20 @@ extension Megrez {
|
||||||
private func join(slice arrSlice: ArraySlice<String>, separator: String) -> String {
|
private func join(slice arrSlice: ArraySlice<String>, separator: String) -> String {
|
||||||
arrSlice.joined(separator: separator)
|
arrSlice.joined(separator: separator)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal func updateCursorJumpingTables(_ anchors: [NodeAnchor]) {
|
||||||
|
var cursorRegionMapDict = [Int: Int]()
|
||||||
|
var counter = 0
|
||||||
|
for (i, anchor) in anchors.enumerated() {
|
||||||
|
for _ in 0..<anchor.spanLength {
|
||||||
|
cursorRegionMapDict[counter] = i
|
||||||
|
counter += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cursorRegionMapDict[counter] = anchors.count
|
||||||
|
cursorRegionMapDict[-1] = 0 // 防呆
|
||||||
|
cursorRegionMap = cursorRegionMapDict
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,93 +24,82 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
extension Megrez {
|
extension Megrez {
|
||||||
/// 軌格。
|
/// 軌格,會被組字器作為原始型別來繼承。
|
||||||
public class Grid {
|
public class Grid {
|
||||||
|
/// 軌格增減行為。
|
||||||
|
public enum ResizeBehavior { case expand, shrink }
|
||||||
/// 幅位陣列。
|
/// 幅位陣列。
|
||||||
private var mutSpans: [Megrez.Span]
|
private(set) var spans: [Megrez.SpanUnit]
|
||||||
|
|
||||||
/// 該幅位內可以允許的最大詞長。
|
/// 該軌格內可以允許的最大幅位長度。
|
||||||
private var mutMaxBuildSpanLength = 10
|
private(set) var maxBuildSpanLength = 10
|
||||||
|
|
||||||
/// 公開:該軌格內可以允許的最大幅位長度。
|
|
||||||
public var maxBuildSpanLength: Int { mutMaxBuildSpanLength }
|
|
||||||
|
|
||||||
/// 公開:軌格的寬度,也就是其內的幅位陣列當中的幅位數量。
|
/// 公開:軌格的寬度,也就是其內的幅位陣列當中的幅位數量。
|
||||||
public var width: Int { mutSpans.count }
|
public var width: Int { spans.count }
|
||||||
|
|
||||||
/// 公開:軌格是否為空。
|
/// 公開:軌格是否為空。
|
||||||
public var isEmpty: Bool { mutSpans.isEmpty }
|
public var isEmpty: Bool { spans.isEmpty }
|
||||||
|
|
||||||
/// 初期化轨格。
|
/// 初期化轨格。
|
||||||
public init(spanLength: Int = 10) {
|
public init(spanLength: Int = 10) {
|
||||||
mutMaxBuildSpanLength = spanLength
|
maxBuildSpanLength = spanLength
|
||||||
mutSpans = [Megrez.Span]()
|
spans = [Megrez.SpanUnit]()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 自我清空該軌格的內容。
|
/// 自我清空該軌格的內容。
|
||||||
public func clear() {
|
public func clear() {
|
||||||
mutSpans.removeAll()
|
spans.removeAll()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 往該軌格的指定位置插入指定幅位長度的指定節點。
|
/// 往該軌格的指定位置插入指定幅位長度的指定節點。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - node: 節點。
|
/// - node: 節點。
|
||||||
/// - location: 位置。
|
/// - location: 位置。
|
||||||
/// - spanningLength: 給定的幅位長度。
|
/// - spanLength: 給定的幅位長度。
|
||||||
public func insertNode(node: Node, location: Int, spanningLength: Int) {
|
public func insertNode(node: Node, location: Int, spanLength: Int) {
|
||||||
let location = abs(location) // 防呆
|
let location = abs(location) // 防呆
|
||||||
let spanningLength = abs(spanningLength) // 防呆
|
let spanLength = abs(spanLength) // 防呆
|
||||||
if location >= mutSpans.count {
|
if location >= spans.count {
|
||||||
let diff = location - mutSpans.count + 1
|
let diff = location - spans.count + 1
|
||||||
for _ in 0..<diff {
|
for _ in 0..<diff {
|
||||||
mutSpans.append(Span())
|
spans.append(SpanUnit())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mutSpans[location].insert(node: node, length: spanningLength)
|
spans[location].insert(node: node, length: spanLength)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 給定索引鍵、位置、幅位長度,在該軌格內確認是否有對應的節點存在。
|
/// 給定索引鍵、位置、幅位長度,在該軌格內確認是否有對應的節點存在。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - location: 位置。
|
/// - location: 位置。
|
||||||
/// - spanningLength: 給定的幅位長度。
|
/// - spanLength: 給定的幅位長度。
|
||||||
/// - key: 索引鍵。
|
/// - key: 索引鍵。
|
||||||
public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool {
|
public func hasMatchedNode(location: Int, spanLength: Int, key: String) -> Bool {
|
||||||
let location = abs(location) // 防呆
|
let location = abs(location) // 防呆
|
||||||
let spanningLength = abs(spanningLength) // 防呆
|
let spanLength = abs(spanLength) // 防呆
|
||||||
if location > mutSpans.count {
|
if location > spans.count {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
let n = mutSpans[location].node(length: spanningLength)
|
let n = spans[location].nodeOf(length: spanLength)
|
||||||
return n != nil && key == n?.key
|
return n != nil && key == n?.key
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 在該軌格的指定位置擴增一個幅位。
|
/// 在該軌格的指定位置擴增或減少一個幅位。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - location: 位置。
|
/// - location: 位置。
|
||||||
public func expandGridByOneAt(location: Int) {
|
public func resizeGridByOneAt(location: Int, to behavior: ResizeBehavior) {
|
||||||
let location = abs(location) // 防呆
|
let location = max(0, min(width, location)) // 防呆
|
||||||
mutSpans.insert(Span(), at: location)
|
switch behavior {
|
||||||
if location == 0 || location == mutSpans.count { return }
|
case .expand:
|
||||||
|
spans.insert(SpanUnit(), at: location)
|
||||||
|
if [spans.count, 0].contains(location) { return }
|
||||||
|
case .shrink:
|
||||||
|
if location >= spans.count { return }
|
||||||
|
spans.remove(at: location)
|
||||||
|
}
|
||||||
for i in 0..<location {
|
for i in 0..<location {
|
||||||
// zaps overlapping spans
|
// zaps overlapping spans
|
||||||
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
spans[i].dropNodesBeyond(length: location - i)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 在該軌格的指定位置減少一個幅位。
|
|
||||||
/// - Parameters:
|
|
||||||
/// - location: 位置。
|
|
||||||
public func shrinkGridByOneAt(location: Int) {
|
|
||||||
let location = abs(location) // 防呆
|
|
||||||
if location >= mutSpans.count {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
mutSpans.remove(at: location)
|
|
||||||
for i in 0..<location {
|
|
||||||
// zaps overlapping spans
|
|
||||||
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,21 +109,21 @@ extension Megrez {
|
||||||
public func nodesBeginningAt(location: Int) -> [NodeAnchor] {
|
public func nodesBeginningAt(location: Int) -> [NodeAnchor] {
|
||||||
let location = abs(location) // 防呆
|
let location = abs(location) // 防呆
|
||||||
var results = [NodeAnchor]()
|
var results = [NodeAnchor]()
|
||||||
if location >= mutSpans.count { return results }
|
if location >= spans.count { return results }
|
||||||
// 此時 mutSpans 必然不為空,因為 location 不可能小於 0。
|
// 此時 spans 必然不為空,因為 location 不可能小於 0。
|
||||||
let span = mutSpans[location]
|
let span = spans[location]
|
||||||
for i in 1...maxBuildSpanLength {
|
for i in 1...maxBuildSpanLength {
|
||||||
if let np = span.node(length: i) {
|
if let np = span.nodeOf(length: i) {
|
||||||
results.append(
|
results.append(
|
||||||
.init(
|
.init(
|
||||||
node: np,
|
node: np,
|
||||||
location: location,
|
location: location,
|
||||||
spanningLength: i
|
spanLength: i
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return results
|
return results // 已證實不會有空節點產生。
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 給定位置,枚舉出所有在這個位置結尾的節點。
|
/// 給定位置,枚舉出所有在這個位置結尾的節點。
|
||||||
|
@ -143,21 +132,21 @@ extension Megrez {
|
||||||
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
|
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
|
||||||
let location = abs(location) // 防呆
|
let location = abs(location) // 防呆
|
||||||
var results = [NodeAnchor]()
|
var results = [NodeAnchor]()
|
||||||
if mutSpans.isEmpty || location > mutSpans.count { return results }
|
if spans.isEmpty || location > spans.count { return results }
|
||||||
for i in 0..<location {
|
for i in 0..<location {
|
||||||
let span = mutSpans[i]
|
let span = spans[i]
|
||||||
if i + span.maximumLength < location { continue }
|
if i + span.maxLength < location { continue }
|
||||||
if let np = span.node(length: location - i) {
|
if let np = span.nodeOf(length: location - i) {
|
||||||
results.append(
|
results.append(
|
||||||
.init(
|
.init(
|
||||||
node: np,
|
node: np,
|
||||||
location: i,
|
location: i,
|
||||||
spanningLength: location - i
|
spanLength: location - i
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return results
|
return results // 已證實不會有空節點產生。
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 給定位置,枚舉出所有在這個位置結尾、或者橫跨該位置的節點。
|
/// 給定位置,枚舉出所有在這個位置結尾、或者橫跨該位置的節點。
|
||||||
|
@ -166,46 +155,76 @@ extension Megrez {
|
||||||
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
|
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
|
||||||
let location = abs(location) // 防呆
|
let location = abs(location) // 防呆
|
||||||
var results = [NodeAnchor]()
|
var results = [NodeAnchor]()
|
||||||
if mutSpans.isEmpty || location > mutSpans.count { return results }
|
if spans.isEmpty || location > spans.count { return results }
|
||||||
for i in 0..<location {
|
for i in 0..<location {
|
||||||
let span = mutSpans[i]
|
let span = spans[i]
|
||||||
if i + span.maximumLength < location { continue }
|
if i + span.maxLength < location { continue }
|
||||||
for j in 1...span.maximumLength {
|
for j in 1...span.maxLength {
|
||||||
if i + j < location { continue }
|
if i + j < location { continue }
|
||||||
if let np = span.node(length: j) {
|
if let np = span.nodeOf(length: j) {
|
||||||
results.append(
|
results.append(
|
||||||
.init(
|
.init(
|
||||||
node: np,
|
node: np,
|
||||||
location: i,
|
location: i,
|
||||||
spanningLength: location - i
|
spanLength: location - i
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return results
|
return results // 已證實不會有空節點產生。
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 將給定位置的節點的候選字詞改為與給定的字串一致的候選字詞。
|
/// 給定位置,枚舉出所有在這個位置結尾或開頭或者橫跨該位置的節點。
|
||||||
|
///
|
||||||
|
/// ⚠︎ 注意:排序可能失真。
|
||||||
|
/// - Parameters:
|
||||||
|
/// - location: 位置。
|
||||||
|
public func nodesOverlappedAt(location: Int) -> [NodeAnchor] {
|
||||||
|
Array(Set(nodesBeginningAt(location: location) + nodesCrossingOrEndingAt(location: location)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 使用給定的候選字字串,將給定位置的節點的候選字詞改為與給定的字串一致的候選字詞。
|
||||||
|
///
|
||||||
|
/// 該函式可以僅用作過程函式,但準確度不如用於處理候選字鍵值配對的 fixNodeWithCandidate()。
|
||||||
|
/// - Parameters:
|
||||||
|
/// - location: 位置。
|
||||||
|
/// - value: 給定字串。
|
||||||
|
@discardableResult public func fixNodeWithCandidateLiteral(_ value: String, at location: Int) -> NodeAnchor {
|
||||||
|
let location = abs(location) // 防呆
|
||||||
|
var node = NodeAnchor()
|
||||||
|
for theAnchor in nodesOverlappedAt(location: location) {
|
||||||
|
let candidates = theAnchor.node.candidates
|
||||||
|
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
|
||||||
|
theAnchor.node.resetCandidate()
|
||||||
|
for (i, candidate) in candidates.enumerated() {
|
||||||
|
if candidate.value == value {
|
||||||
|
theAnchor.node.selectCandidateAt(index: i)
|
||||||
|
node = theAnchor
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 使用給定的候選字鍵值配對,將給定位置的節點的候選字詞改為與給定的字串一致的候選字詞。
|
||||||
///
|
///
|
||||||
/// 該函式可以僅用作過程函式。
|
/// 該函式可以僅用作過程函式。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - location: 位置。
|
/// - location: 位置。
|
||||||
/// - value: 給定字串。
|
/// - value: 給定候選字鍵值配對。
|
||||||
@discardableResult public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor {
|
@discardableResult public func fixNodeWithCandidate(_ pair: KeyValuePaired, at location: Int) -> NodeAnchor {
|
||||||
let location = abs(location) // 防呆
|
let location = abs(location) // 防呆
|
||||||
var node = NodeAnchor()
|
var node = NodeAnchor()
|
||||||
for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
|
for theAnchor in nodesOverlappedAt(location: location) {
|
||||||
guard let theNode = nodeAnchor.node else {
|
let candidates = theAnchor.node.candidates
|
||||||
continue
|
|
||||||
}
|
|
||||||
let candidates = theNode.candidates
|
|
||||||
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
|
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
|
||||||
theNode.resetCandidate()
|
theAnchor.node.resetCandidate()
|
||||||
for (i, candidate) in candidates.enumerated() {
|
for (i, candidate) in candidates.enumerated() {
|
||||||
if candidate.value == value {
|
if candidate == pair {
|
||||||
theNode.selectCandidateAt(index: i)
|
theAnchor.node.selectCandidateAt(index: i)
|
||||||
node = nodeAnchor
|
node = theAnchor
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -220,16 +239,13 @@ extension Megrez {
|
||||||
/// - overridingScore: 給定權重數值。
|
/// - overridingScore: 給定權重數值。
|
||||||
public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) {
|
public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) {
|
||||||
let location = abs(location) // 防呆
|
let location = abs(location) // 防呆
|
||||||
for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
|
for theAnchor in nodesOverlappedAt(location: location) {
|
||||||
guard let theNode = nodeAnchor.node else {
|
let candidates = theAnchor.node.candidates
|
||||||
continue
|
|
||||||
}
|
|
||||||
let candidates = theNode.candidates
|
|
||||||
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
|
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
|
||||||
theNode.resetCandidate()
|
theAnchor.node.resetCandidate()
|
||||||
for (i, candidate) in candidates.enumerated() {
|
for (i, candidate) in candidates.enumerated() {
|
||||||
if candidate.value == value {
|
if candidate.value == value {
|
||||||
theNode.selectFloatingCandidateAt(index: i, score: overridingScore)
|
theAnchor.node.selectFloatingCandidateAt(index: i, score: overridingScore)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -244,29 +260,22 @@ extension Megrez.Grid {
|
||||||
/// 生成用以交給 GraphViz 診斷的資料檔案內容,純文字。
|
/// 生成用以交給 GraphViz 診斷的資料檔案內容,純文字。
|
||||||
public var dumpDOT: String {
|
public var dumpDOT: String {
|
||||||
var strOutput = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n"
|
var strOutput = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n"
|
||||||
for (p, span) in mutSpans.enumerated() {
|
for (p, span) in spans.enumerated() {
|
||||||
for ni in 0...(span.maximumLength) {
|
for ni in 0...(span.maxLength) {
|
||||||
guard let np: Megrez.Node = span.node(length: ni) else {
|
guard let np = span.nodeOf(length: ni) else { continue }
|
||||||
continue
|
|
||||||
}
|
|
||||||
if p == 0 {
|
if p == 0 {
|
||||||
strOutput += "BOS -> \(np.currentKeyValue.value);\n"
|
strOutput += "BOS -> \(np.currentPair.value);\n"
|
||||||
}
|
}
|
||||||
|
strOutput += "\(np.currentPair.value);\n"
|
||||||
strOutput += "\(np.currentKeyValue.value);\n"
|
if (p + ni) < spans.count {
|
||||||
|
let destinationSpan = spans[p + ni]
|
||||||
if (p + ni) < mutSpans.count {
|
for q in 0...(destinationSpan.maxLength) {
|
||||||
let destinationSpan = mutSpans[p + ni]
|
guard let dn = destinationSpan.nodeOf(length: q) else { continue }
|
||||||
for q in 0...(destinationSpan.maximumLength) {
|
strOutput += np.currentPair.value + " -> " + dn.currentPair.value + ";\n"
|
||||||
if let dn = destinationSpan.node(length: q) {
|
|
||||||
strOutput += np.currentKeyValue.value + " -> " + dn.currentKeyValue.value + ";\n"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
guard (p + ni) == spans.count else { continue }
|
||||||
if (p + ni) == mutSpans.count {
|
strOutput += np.currentPair.value + " -> EOS;\n"
|
||||||
strOutput += np.currentKeyValue.value + " -> EOS;\n"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
strOutput += "EOS;\n}\n"
|
strOutput += "EOS;\n}\n"
|
||||||
|
|
|
@ -25,25 +25,34 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
extension Megrez {
|
extension Megrez {
|
||||||
/// 節锚。
|
/// 節锚。
|
||||||
@frozen public struct NodeAnchor: CustomStringConvertible {
|
@frozen public struct NodeAnchor: Hashable {
|
||||||
|
/// 用來判斷該節錨是否為空。
|
||||||
|
public var isEmpty: Bool { node.key.isEmpty }
|
||||||
/// 節點。一個節锚內不一定有節點。
|
/// 節點。一個節锚內不一定有節點。
|
||||||
public var node: Node?
|
public var node: Node = .init()
|
||||||
/// 節锚所在的位置。
|
/// 節锚所在的位置。
|
||||||
public var location: Int = 0
|
public var location: Int = 0
|
||||||
/// 幅位長度。
|
/// 指定的幅位長度。
|
||||||
public var spanningLength: Int = 0
|
public var spanLength: Int = 0
|
||||||
/// 累計權重。
|
/// 累計權重。
|
||||||
public var accumulatedScore: Double = 0.0
|
public var mass: Double = 0.0
|
||||||
/// 索引鍵的長度。
|
/// 索引鍵的長度。
|
||||||
public var keyLength: Int {
|
public var keyLength: Int {
|
||||||
node?.key.count ?? 0
|
isEmpty ? node.key.count : 0
|
||||||
|
}
|
||||||
|
|
||||||
|
public func hash(into hasher: inout Hasher) {
|
||||||
|
hasher.combine(node)
|
||||||
|
hasher.combine(location)
|
||||||
|
hasher.combine(spanLength)
|
||||||
|
hasher.combine(mass)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 將當前節锚列印成一個字串。
|
/// 將當前節锚列印成一個字串。
|
||||||
public var description: String {
|
public var description: String {
|
||||||
var stream = ""
|
var stream = ""
|
||||||
stream += "{@(" + String(location) + "," + String(spanningLength) + "),"
|
stream += "{@(" + String(location) + "," + String(spanLength) + "),"
|
||||||
if let node = node {
|
if node.key.isEmpty {
|
||||||
stream += node.description
|
stream += node.description
|
||||||
} else {
|
} else {
|
||||||
stream += "null"
|
stream += "null"
|
||||||
|
@ -54,12 +63,12 @@ extension Megrez {
|
||||||
|
|
||||||
/// 獲取用來比較的權重。
|
/// 獲取用來比較的權重。
|
||||||
public var scoreForSort: Double {
|
public var scoreForSort: Double {
|
||||||
node?.score ?? 0
|
isEmpty ? node.score : 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - DumpDOT-related functions.
|
// MARK: - Array Extensions.
|
||||||
|
|
||||||
extension Array where Element == Megrez.NodeAnchor {
|
extension Array where Element == Megrez.NodeAnchor {
|
||||||
/// 將節锚陣列列印成一個字串。
|
/// 將節锚陣列列印成一個字串。
|
||||||
|
@ -70,4 +79,14 @@ extension Array where Element == Megrez.NodeAnchor {
|
||||||
}
|
}
|
||||||
return arrOutputContent.joined(separator: "<-")
|
return arrOutputContent.joined(separator: "<-")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// 從一個節錨陣列當中取出目前的自動選字字串陣列。
|
||||||
|
public var values: [String] {
|
||||||
|
map(\.node.currentPair.value)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 從一個節錨陣列當中取出目前的索引鍵陣列。
|
||||||
|
public var keys: [String] {
|
||||||
|
map(\.node.currentPair.key)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,21 +25,16 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
extension Megrez {
|
extension Megrez {
|
||||||
/// 幅位。
|
/// 幅位。
|
||||||
@frozen public struct Span {
|
@frozen public struct SpanUnit {
|
||||||
/// 辭典:以節點長度為索引,以節點為資料值。
|
/// 辭典:以節點長度為索引,以節點為資料值。
|
||||||
private var mutLengthNodeMap: [Int: Megrez.Node] = [:]
|
private var lengthNodeMap: [Int: Megrez.Node] = [:]
|
||||||
/// 最大節點長度。
|
/// 最長幅距。
|
||||||
private var mutMaximumLength: Int = 0
|
private(set) var maxLength: Int = 0
|
||||||
|
|
||||||
/// 公開:最長幅距(唯讀)。
|
|
||||||
public var maximumLength: Int {
|
|
||||||
mutMaximumLength
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 自我清空,各項參數歸零。
|
/// 自我清空,各項參數歸零。
|
||||||
mutating func clear() {
|
mutating func clear() {
|
||||||
mutLengthNodeMap.removeAll()
|
lengthNodeMap.removeAll()
|
||||||
mutMaximumLength = 0
|
maxLength = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 往自身插入一個節點、及給定的節點長度。
|
/// 往自身插入一個節點、及給定的節點長度。
|
||||||
|
@ -48,37 +43,37 @@ extension Megrez {
|
||||||
/// - length: 給定的節點長度。
|
/// - length: 給定的節點長度。
|
||||||
mutating func insert(node: Node, length: Int) {
|
mutating func insert(node: Node, length: Int) {
|
||||||
let length = abs(length) // 防呆
|
let length = abs(length) // 防呆
|
||||||
mutLengthNodeMap[length] = node
|
lengthNodeMap[length] = node
|
||||||
mutMaximumLength = max(mutMaximumLength, length)
|
maxLength = max(maxLength, length)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 移除任何比給定的長度更長的節點。
|
/// 移除任何比給定的長度更長的節點。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - length: 給定的節點長度。
|
/// - length: 給定的節點長度。
|
||||||
mutating func removeNodeOfLengthGreaterThan(_ length: Int) {
|
mutating func dropNodesBeyond(length: Int) {
|
||||||
let length = abs(length) // 防呆
|
let length = abs(length) // 防呆
|
||||||
if length > mutMaximumLength { return }
|
if length > maxLength { return }
|
||||||
var lenMax = 0
|
var lenMax = 0
|
||||||
var removalList: [Int: Megrez.Node] = [:]
|
var removalList: [Int: Megrez.Node] = [:]
|
||||||
for key in mutLengthNodeMap.keys {
|
for key in lengthNodeMap.keys {
|
||||||
if key > length {
|
if key > length {
|
||||||
removalList[key] = mutLengthNodeMap[key]
|
removalList[key] = lengthNodeMap[key]
|
||||||
} else {
|
} else {
|
||||||
lenMax = max(lenMax, key)
|
lenMax = max(lenMax, key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for key in removalList.keys {
|
for key in removalList.keys {
|
||||||
mutLengthNodeMap.removeValue(forKey: key)
|
lengthNodeMap.removeValue(forKey: key)
|
||||||
}
|
}
|
||||||
mutMaximumLength = lenMax
|
maxLength = lenMax
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 給定節點長度,獲取節點。
|
/// 給定節點長度,獲取節點。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - length: 給定的節點長度。
|
/// - length: 給定的節點長度。
|
||||||
public func node(length: Int) -> Node? {
|
public func nodeOf(length: Int) -> Node? {
|
||||||
// 防呆 Abs()
|
// 防呆 Abs()
|
||||||
mutLengthNodeMap.keys.contains(abs(length)) ? mutLengthNodeMap[abs(length)] : nil
|
lengthNodeMap.keys.contains(abs(length)) ? lengthNodeMap[abs(length)] : nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,76 +25,86 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
extension Megrez {
|
extension Megrez {
|
||||||
/// 節點。
|
/// 節點。
|
||||||
public class Node {
|
public class Node: Equatable, Hashable {
|
||||||
/// 鍵。
|
public static func == (lhs: Megrez.Node, rhs: Megrez.Node) -> Bool {
|
||||||
private var mutKey: String = ""
|
lhs.key == rhs.key && lhs.score == rhs.score && lhs.unigrams == rhs.unigrams && lhs.bigrams == rhs.bigrams
|
||||||
/// 當前節點的當前被選中的候選字詞「在該節點內的」目前的權重。
|
&& lhs.candidates == rhs.candidates && lhs.valueUnigramIndexMap == rhs.valueUnigramIndexMap
|
||||||
private var mutScore: Double = 0
|
&& lhs.precedingBigramMap == rhs.precedingBigramMap && lhs.isCandidateFixed == rhs.isCandidateFixed
|
||||||
/// 單元圖陣列。
|
&& lhs.selectedUnigramIndex == rhs.selectedUnigramIndex
|
||||||
private var mutUnigrams: [Unigram]
|
|
||||||
/// 雙元圖陣列。
|
|
||||||
private var mutBigrams: [Bigram]
|
|
||||||
/// 候選字詞陣列,以鍵值陣列的形式存在。
|
|
||||||
private var mutCandidates: [KeyValuePaired] = []
|
|
||||||
/// 專門「用單元圖資料值來調查索引值」的辭典。
|
|
||||||
private var mutValueUnigramIndexMap: [String: Int] = [:]
|
|
||||||
/// 專門「用給定鍵值來取對應的雙元圖陣列」的辭典。
|
|
||||||
private var mutPrecedingBigramMap: [KeyValuePaired: [Megrez.Bigram]] = [:]
|
|
||||||
/// 狀態標記變數,用來記載當前節點是否處於候選字詞鎖定狀態。
|
|
||||||
private var mutCandidateFixed: Bool = false
|
|
||||||
/// 用來登記「當前選中的單元圖」的索引值的變數。
|
|
||||||
private var mutSelectedUnigramIndex: Int = 0
|
|
||||||
/// 用來登記要施加給「『被標記為選中狀態』的候選字詞」的複寫權重的數值。
|
|
||||||
public let kSelectedCandidateScore: Double = 99
|
|
||||||
/// 將當前節點列印成一個字串。
|
|
||||||
public var description: String {
|
|
||||||
"(node,key:\(mutKey),fixed:\(mutCandidateFixed ? "true" : "false"),selected:\(mutSelectedUnigramIndex),\(mutUnigrams))"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 公開:候選字詞陣列(唯讀),以鍵值陣列的形式存在。
|
public func hash(into hasher: inout Hasher) {
|
||||||
public var candidates: [KeyValuePaired] { mutCandidates }
|
hasher.combine(key)
|
||||||
/// 公開:用來登記「當前選中的單元圖」的索引值的變數(唯讀)。
|
hasher.combine(score)
|
||||||
public var isCandidateFixed: Bool { mutCandidateFixed }
|
hasher.combine(unigrams)
|
||||||
|
hasher.combine(bigrams)
|
||||||
|
hasher.combine(candidates)
|
||||||
|
hasher.combine(valueUnigramIndexMap)
|
||||||
|
hasher.combine(precedingBigramMap)
|
||||||
|
hasher.combine(isCandidateFixed)
|
||||||
|
hasher.combine(selectedUnigramIndex)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 鍵。
|
||||||
|
private(set) var key: String = ""
|
||||||
|
/// 當前節點的當前被選中的候選字詞「在該節點內的」目前的權重。
|
||||||
|
private(set) var score: Double = 0
|
||||||
|
/// 單元圖陣列。
|
||||||
|
private var unigrams: [Unigram]
|
||||||
|
/// 雙元圖陣列。
|
||||||
|
private var bigrams: [Bigram]
|
||||||
|
/// 候選字詞陣列,以鍵值陣列的形式存在。
|
||||||
|
private(set) var candidates: [KeyValuePaired] = []
|
||||||
|
/// 專門「用單元圖資料值來調查索引值」的辭典。
|
||||||
|
private var valueUnigramIndexMap: [String: Int] = [:]
|
||||||
|
/// 專門「用給定鍵值來取對應的雙元圖陣列」的辭典。
|
||||||
|
private var precedingBigramMap: [KeyValuePaired: [Megrez.Bigram]] = [:]
|
||||||
|
/// 狀態標記變數,用來記載當前節點是否處於候選字詞鎖定狀態。
|
||||||
|
private(set) var isCandidateFixed: Bool = false
|
||||||
|
/// 用來登記「當前選中的單元圖」的索引值的變數。
|
||||||
|
private var selectedUnigramIndex: Int = 0
|
||||||
|
/// 用來登記要施加給「『被標記為選中狀態』的候選字詞」的複寫權重的數值。
|
||||||
|
public static let kSelectedCandidateScore: Double = 99
|
||||||
|
/// 將當前節點列印成一個字串。
|
||||||
|
public var description: String {
|
||||||
|
"(node,key:\(key),fixed:\(isCandidateFixed ? "true" : "false"),selected:\(selectedUnigramIndex),\(unigrams))"
|
||||||
|
}
|
||||||
|
|
||||||
/// 公開:鍵(唯讀)。
|
|
||||||
public var key: String { mutKey }
|
|
||||||
/// 公開:當前節點的當前被選中的候選字詞「在該節點內的」目前的權重(唯讀)。
|
|
||||||
public var score: Double { mutScore }
|
|
||||||
/// 公開:當前被選中的候選字詞的鍵值配對。
|
/// 公開:當前被選中的候選字詞的鍵值配對。
|
||||||
public var currentKeyValue: KeyValuePaired {
|
public var currentPair: KeyValuePaired {
|
||||||
mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePaired() : mutCandidates[mutSelectedUnigramIndex]
|
selectedUnigramIndex >= unigrams.count ? KeyValuePaired() : candidates[selectedUnigramIndex]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 公開:給出當前單元圖陣列內最高的權重數值。
|
/// 公開:給出當前單元圖陣列內最高的權重數值。
|
||||||
public var highestUnigramScore: Double { mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score }
|
public var highestUnigramScore: Double { unigrams.isEmpty ? 0.0 : unigrams[0].score }
|
||||||
|
|
||||||
/// 初期化一個節點。
|
/// 初期化一個節點。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - key: 索引鍵。
|
/// - key: 索引鍵。
|
||||||
/// - unigrams: 單元圖陣列。
|
/// - unigrams: 單元圖陣列。
|
||||||
/// - bigrams: 雙元圖陣列(非必填)。
|
/// - bigrams: 雙元圖陣列(非必填)。
|
||||||
public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) {
|
public init(key: String = "", unigrams: [Megrez.Unigram] = [], bigrams: [Megrez.Bigram] = []) {
|
||||||
mutKey = key
|
self.key = key
|
||||||
mutUnigrams = unigrams
|
self.unigrams = unigrams
|
||||||
mutBigrams = bigrams
|
self.bigrams = bigrams
|
||||||
|
|
||||||
mutUnigrams.sort {
|
self.unigrams.sort {
|
||||||
$0.score > $1.score
|
$0.score > $1.score
|
||||||
}
|
}
|
||||||
|
|
||||||
if !mutUnigrams.isEmpty {
|
if !self.unigrams.isEmpty {
|
||||||
mutScore = mutUnigrams[0].score
|
score = unigrams[0].score
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i, gram) in mutUnigrams.enumerated() {
|
for (i, gram) in self.unigrams.enumerated() {
|
||||||
mutValueUnigramIndexMap[gram.keyValue.value] = i
|
valueUnigramIndexMap[gram.keyValue.value] = i
|
||||||
mutCandidates.append(gram.keyValue)
|
candidates.append(gram.keyValue)
|
||||||
}
|
}
|
||||||
|
|
||||||
for gram in bigrams.lazy.filter({ [self] in
|
for gram in bigrams.lazy.filter({ [self] in
|
||||||
mutPrecedingBigramMap.keys.contains($0.precedingKeyValue)
|
precedingBigramMap.keys.contains($0.precedingKeyValue)
|
||||||
}) {
|
}) {
|
||||||
mutPrecedingBigramMap[gram.precedingKeyValue]?.append(gram)
|
precedingBigramMap[gram.precedingKeyValue]?.append(gram)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,22 +112,22 @@ extension Megrez {
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - precedingKeyValues: 前述鍵值陣列。
|
/// - precedingKeyValues: 前述鍵值陣列。
|
||||||
public func primeNodeWith(precedingKeyValues: [KeyValuePaired]) {
|
public func primeNodeWith(precedingKeyValues: [KeyValuePaired]) {
|
||||||
var newIndex = mutSelectedUnigramIndex
|
var newIndex = selectedUnigramIndex
|
||||||
var max = mutScore
|
var max = score
|
||||||
|
|
||||||
if !isCandidateFixed {
|
if !isCandidateFixed {
|
||||||
for neta in precedingKeyValues {
|
for neta in precedingKeyValues {
|
||||||
let bigrams = mutPrecedingBigramMap[neta] ?? []
|
let bigrams = precedingBigramMap[neta] ?? []
|
||||||
for bigram in bigrams.lazy.filter({ [self] in
|
for bigram in bigrams.lazy.filter({ [self] in
|
||||||
$0.score > max && mutValueUnigramIndexMap.keys.contains($0.keyValue.value)
|
$0.score > max && valueUnigramIndexMap.keys.contains($0.keyValue.value)
|
||||||
}) {
|
}) {
|
||||||
newIndex = mutValueUnigramIndexMap[bigram.keyValue.value] ?? newIndex
|
newIndex = valueUnigramIndexMap[bigram.keyValue.value] ?? newIndex
|
||||||
max = bigram.score
|
max = bigram.score
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mutScore = max
|
score = max
|
||||||
mutSelectedUnigramIndex = newIndex
|
selectedUnigramIndex = newIndex
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 選中位於給定索引位置的候選字詞。
|
/// 選中位於給定索引位置的候選字詞。
|
||||||
|
@ -126,17 +136,17 @@ extension Megrez {
|
||||||
/// - fix: 是否將當前解點標記為「候選詞已鎖定」的狀態。
|
/// - fix: 是否將當前解點標記為「候選詞已鎖定」的狀態。
|
||||||
public func selectCandidateAt(index: Int = 0, fix: Bool = false) {
|
public func selectCandidateAt(index: Int = 0, fix: Bool = false) {
|
||||||
let index = abs(index)
|
let index = abs(index)
|
||||||
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
|
selectedUnigramIndex = index >= unigrams.count ? 0 : index
|
||||||
mutCandidateFixed = fix
|
isCandidateFixed = fix
|
||||||
mutScore = kSelectedCandidateScore
|
score = Megrez.Node.kSelectedCandidateScore
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 重設該節點的候選字詞狀態。
|
/// 重設該節點的候選字詞狀態。
|
||||||
public func resetCandidate() {
|
public func resetCandidate() {
|
||||||
mutSelectedUnigramIndex = 0
|
selectedUnigramIndex = 0
|
||||||
mutCandidateFixed = false
|
isCandidateFixed = false
|
||||||
if !mutUnigrams.isEmpty {
|
if !unigrams.isEmpty {
|
||||||
mutScore = mutUnigrams[0].score
|
score = unigrams[0].score
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -146,16 +156,26 @@ extension Megrez {
|
||||||
/// - score: 給定權重條件。
|
/// - score: 給定權重條件。
|
||||||
public func selectFloatingCandidateAt(index: Int, score: Double) {
|
public func selectFloatingCandidateAt(index: Int, score: Double) {
|
||||||
let index = abs(index) // 防呆
|
let index = abs(index) // 防呆
|
||||||
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
|
selectedUnigramIndex = index >= unigrams.count ? 0 : index
|
||||||
mutCandidateFixed = false
|
isCandidateFixed = false
|
||||||
mutScore = score
|
self.score = score
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 藉由給定的候選字詞字串,找出在庫的單元圖權重數值。沒有的話就找零。
|
/// 藉由給定的候選字詞字串,找出在庫的單元圖權重數值。沒有的話就找零。
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - candidate: 給定的候選字詞字串。
|
/// - candidate: 給定的候選字詞字串。
|
||||||
public func scoreFor(candidate: String) -> Double {
|
public func scoreFor(candidate: String) -> Double {
|
||||||
for unigram in mutUnigrams.lazy.filter({ $0.keyValue.value == candidate }) {
|
for unigram in unigrams.lazy.filter({ $0.keyValue.value == candidate }) {
|
||||||
|
return unigram.score
|
||||||
|
}
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 藉由給定的候選字詞鍵值配對,找出在庫的單元圖權重數值。沒有的話就找零。
|
||||||
|
/// - Parameters:
|
||||||
|
/// - candidate: 給定的候選字詞字串。
|
||||||
|
public func scoreForPaired(candidate: KeyValuePaired) -> Double {
|
||||||
|
for unigram in unigrams.lazy.filter({ $0.keyValue == candidate }) {
|
||||||
return unigram.score
|
return unigram.score
|
||||||
}
|
}
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
|
@ -23,7 +23,7 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public protocol LanguageModelProtocol {
|
public protocol LangModelProtocol {
|
||||||
/// 給定鍵,讓語言模型找給一組單元圖陣列。
|
/// 給定鍵,讓語言模型找給一組單元圖陣列。
|
||||||
func unigramsFor(key: String) -> [Megrez.Unigram]
|
func unigramsFor(key: String) -> [Megrez.Unigram]
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ public protocol LanguageModelProtocol {
|
||||||
|
|
||||||
extension Megrez {
|
extension Megrez {
|
||||||
/// 語言模型框架,回頭實際使用時需要派生一個型別、且重寫相關函式。
|
/// 語言模型框架,回頭實際使用時需要派生一個型別、且重寫相關函式。
|
||||||
open class LanguageModel: LanguageModelProtocol {
|
open class LangModel: LangModelProtocol {
|
||||||
public init() {}
|
public init() {}
|
||||||
|
|
||||||
// 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函式的參數設計。
|
// 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函式的參數設計。
|
||||||
|
|
|
@ -25,7 +25,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
extension Megrez {
|
extension Megrez {
|
||||||
/// 雙元圖。
|
/// 雙元圖。
|
||||||
@frozen public struct Bigram: Equatable, CustomStringConvertible {
|
@frozen public struct Bigram: Equatable, CustomStringConvertible, Hashable {
|
||||||
/// 當前鍵值。
|
/// 當前鍵值。
|
||||||
public var keyValue: KeyValuePaired
|
public var keyValue: KeyValuePaired
|
||||||
/// 前述鍵值。
|
/// 前述鍵值。
|
||||||
|
@ -61,7 +61,7 @@ extension Megrez {
|
||||||
|
|
||||||
public static func < (lhs: Bigram, rhs: Bigram) -> Bool {
|
public static func < (lhs: Bigram, rhs: Bigram) -> Bool {
|
||||||
lhs.precedingKeyValue < rhs.precedingKeyValue
|
lhs.precedingKeyValue < rhs.precedingKeyValue
|
||||||
|| (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue))
|
|| (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.score < rhs.score))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
extension Megrez {
|
extension Megrez {
|
||||||
/// 單元圖。
|
/// 單元圖。
|
||||||
@frozen public struct Unigram: Equatable, CustomStringConvertible {
|
@frozen public struct Unigram: Equatable, CustomStringConvertible, Hashable {
|
||||||
/// 鍵值。
|
/// 鍵值。
|
||||||
public var keyValue: KeyValuePaired
|
public var keyValue: KeyValuePaired
|
||||||
/// 權重。
|
/// 權重。
|
||||||
|
@ -54,7 +54,7 @@ extension Megrez {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static func < (lhs: Unigram, rhs: Unigram) -> Bool {
|
public static func < (lhs: Unigram, rhs: Unigram) -> Bool {
|
||||||
lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue)
|
lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.score < rhs.score)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,7 +52,7 @@ extension Megrez {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static func == (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
|
public static func == (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
|
||||||
lhs.key.count == rhs.key.count && lhs.value == rhs.value
|
lhs.key == rhs.key && lhs.value == rhs.value
|
||||||
}
|
}
|
||||||
|
|
||||||
public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
|
public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
|
||||||
|
|
Loading…
Reference in New Issue