KeyHandler & Megrez // Updates to Megrez v1.1.2.
This commit is contained in:
parent
af2bdc4343
commit
494e9cf637
|
@ -72,7 +72,7 @@ class KeyHandler {
|
|||
}
|
||||
|
||||
public init() {
|
||||
_builder = Megrez.BlockReadingBuilder(lm: _languageModel)
|
||||
_builder = Megrez.BlockReadingBuilder(lm: _languageModel, separator: "-")
|
||||
ensureParser()
|
||||
setInputMode(ctlInputMethod.currentInputMode)
|
||||
}
|
||||
|
@ -118,9 +118,7 @@ class KeyHandler {
|
|||
// of the best possible Mandarin characters given the input syllables,
|
||||
// using the Viterbi algorithm implemented in the Megrez library.
|
||||
// The walk() traces the grid to the end, hence no need to use .reversed() here.
|
||||
_walkedNodes = Megrez.Walker(
|
||||
grid: _builder.grid()
|
||||
).walk(at: _builder.grid().width(), nodesLimit: 3, balanced: true)
|
||||
_walkedNodes = _builder.walk(at: _builder.grid.width, nodesLimit: 3, balanced: true)
|
||||
}
|
||||
|
||||
func popOverflowComposingTextAndWalk() -> String {
|
||||
|
@ -133,11 +131,11 @@ class KeyHandler {
|
|||
// (i.e. popped out.)
|
||||
|
||||
var poppedText = ""
|
||||
if _builder.grid().width() > mgrPrefs.composingBufferSize {
|
||||
if _builder.grid.width > mgrPrefs.composingBufferSize {
|
||||
if _walkedNodes.count > 0 {
|
||||
let anchor: Megrez.NodeAnchor = _walkedNodes[0]
|
||||
if let theNode = anchor.node {
|
||||
poppedText = theNode.currentKeyValue().value
|
||||
poppedText = theNode.currentKeyValue.value
|
||||
}
|
||||
_builder.removeHeadReadings(count: anchor.spanningLength)
|
||||
}
|
||||
|
@ -156,7 +154,7 @@ class KeyHandler {
|
|||
|
||||
func fixNode(value: String) {
|
||||
let cursorIndex: Int = getActualCandidateCursorIndex()
|
||||
let selectedNode: Megrez.NodeAnchor = _builder.grid().fixNodeSelectedCandidate(
|
||||
let selectedNode: Megrez.NodeAnchor = _builder.grid.fixNodeSelectedCandidate(
|
||||
location: cursorIndex, value: value
|
||||
)
|
||||
// 不要針對逐字選字模式啟用臨時半衰記憶模型。
|
||||
|
@ -216,7 +214,7 @@ class KeyHandler {
|
|||
// then use the Swift trick to retrieve the candidates for each node at/crossing the cursor
|
||||
for currentNodeAnchor in arrNodes {
|
||||
if let currentNode = currentNodeAnchor.node {
|
||||
for currentCandidate in currentNode.candidates() {
|
||||
for currentCandidate in currentNode.candidates {
|
||||
arrCandidates.append(currentCandidate.value)
|
||||
}
|
||||
}
|
||||
|
@ -237,7 +235,7 @@ class KeyHandler {
|
|||
if !overrideValue.isEmpty {
|
||||
IME.prtDebugIntel(
|
||||
"UOM: Suggestion retrieved, overriding the node score of the selected candidate.")
|
||||
_builder.grid().overrideNodeScoreForSelectedCandidate(
|
||||
_builder.grid.overrideNodeScoreForSelectedCandidate(
|
||||
location: getActualCandidateCursorIndex(),
|
||||
value: overrideValue,
|
||||
overridingScore: findHighestScore(nodes: getRawNodes(), epsilon: kEpsilon)
|
||||
|
@ -251,7 +249,7 @@ class KeyHandler {
|
|||
var highestScore: Double = 0
|
||||
for currentAnchor in nodes {
|
||||
if let theNode = currentAnchor.node {
|
||||
let score = theNode.highestUnigramScore()
|
||||
let score = theNode.highestUnigramScore
|
||||
if score > highestScore {
|
||||
highestScore = score
|
||||
}
|
||||
|
@ -262,15 +260,15 @@ class KeyHandler {
|
|||
|
||||
// MARK: - Extracted methods and functions (Megrez).
|
||||
|
||||
func isBuilderEmpty() -> Bool { _builder.grid().width() == 0 }
|
||||
func isBuilderEmpty() -> Bool { _builder.grid.width == 0 }
|
||||
|
||||
func getRawNodes() -> [Megrez.NodeAnchor] {
|
||||
/// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。
|
||||
/// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的,但目前 Megrez 暫時缺乏對該特性的支援。
|
||||
/// 所以暫時只能將威注音的游標後置風格描述成「跟 Windows 版雅虎奇摩注音一致」。
|
||||
mgrPrefs.setRearCursorMode
|
||||
? _builder.grid().nodesCrossingOrEndingAt(location: getActualCandidateCursorIndex())
|
||||
: _builder.grid().nodesEndingAt(location: getActualCandidateCursorIndex())
|
||||
? _builder.grid.nodesCrossingOrEndingAt(location: getActualCandidateCursorIndex())
|
||||
: _builder.grid.nodesEndingAt(location: getActualCandidateCursorIndex())
|
||||
}
|
||||
|
||||
func setInputModesToLM(isCHS: Bool) {
|
||||
|
@ -285,12 +283,11 @@ class KeyHandler {
|
|||
}
|
||||
|
||||
func createNewBuilder() {
|
||||
_builder = Megrez.BlockReadingBuilder(lm: _languageModel)
|
||||
// Each Mandarin syllable is separated by a hyphen.
|
||||
_builder.setJoinSeparator(separator: "-")
|
||||
_builder = Megrez.BlockReadingBuilder(lm: _languageModel, separator: "-")
|
||||
}
|
||||
|
||||
func currentReadings() -> [String] { _builder.readings() }
|
||||
func currentReadings() -> [String] { _builder.readings }
|
||||
|
||||
func ifLangModelHasUnigrams(forKey reading: String) -> Bool {
|
||||
_languageModel.hasUnigramsFor(key: reading)
|
||||
|
@ -301,15 +298,15 @@ class KeyHandler {
|
|||
}
|
||||
|
||||
func setBuilderCursorIndex(value: Int) {
|
||||
_builder.setCursorIndex(newIndex: value)
|
||||
_builder.cursorIndex = value
|
||||
}
|
||||
|
||||
func getBuilderCursorIndex() -> Int {
|
||||
_builder.cursorIndex()
|
||||
_builder.cursorIndex
|
||||
}
|
||||
|
||||
func getBuilderLength() -> Int {
|
||||
_builder.length()
|
||||
_builder.length
|
||||
}
|
||||
|
||||
func deleteBuilderReadingInFrontOfCursor() {
|
||||
|
@ -321,7 +318,7 @@ class KeyHandler {
|
|||
}
|
||||
|
||||
func getKeyLengthAtIndexZero() -> Int {
|
||||
_walkedNodes[0].node?.currentKeyValue().value.count ?? 0
|
||||
_walkedNodes[0].node?.currentKeyValue.value.count ?? 0
|
||||
}
|
||||
|
||||
// MARK: - Extracted methods and functions (Tekkon).
|
||||
|
|
|
@ -45,7 +45,7 @@ extension KeyHandler {
|
|||
continue
|
||||
}
|
||||
|
||||
let valueString = node.currentKeyValue().value
|
||||
let valueString = node.currentKeyValue.value
|
||||
composingBuffer += valueString
|
||||
let codepointCount = valueString.count
|
||||
|
||||
|
@ -303,7 +303,7 @@ extension KeyHandler {
|
|||
|
||||
for theAnchor in _walkedNodes {
|
||||
if let node = theAnchor.node {
|
||||
var key = node.currentKeyValue().key
|
||||
var key = node.currentKeyValue.key
|
||||
if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin {
|
||||
key = restoreToneOneInZhuyinKey(target: key) // 恢復陰平標記
|
||||
key = Tekkon.cnvPhonaToHanyuPinyin(target: key) // 注音轉拼音
|
||||
|
@ -313,7 +313,7 @@ extension KeyHandler {
|
|||
key = cnvZhuyinKeyToTextbookReading(target: key, newSeparator: " ")
|
||||
}
|
||||
|
||||
let value = node.currentKeyValue().value
|
||||
let value = node.currentKeyValue.value
|
||||
if key.contains("_") { // 不要給標點符號等特殊元素加注音
|
||||
composed += value
|
||||
} else {
|
||||
|
|
|
@ -186,7 +186,7 @@ extension vChewing {
|
|||
var strPrevious = "()"
|
||||
var strAnterior = "()"
|
||||
|
||||
guard let kvCurrent = arrNodesReversed[0].node?.currentKeyValue(),
|
||||
guard let kvCurrent = arrNodesReversed[0].node?.currentKeyValue,
|
||||
!arrEndingPunctuation.contains(kvCurrent.value)
|
||||
else {
|
||||
return ""
|
||||
|
@ -196,14 +196,14 @@ extension vChewing {
|
|||
strCurrent = kvCurrent.key
|
||||
|
||||
if arrNodesReversed.count >= 2,
|
||||
let kvPrevious = arrNodesReversed[1].node?.currentKeyValue(),
|
||||
let kvPrevious = arrNodesReversed[1].node?.currentKeyValue,
|
||||
!arrEndingPunctuation.contains(kvPrevious.value)
|
||||
{
|
||||
strPrevious = "(\(kvPrevious.key),\(kvPrevious.value))"
|
||||
}
|
||||
|
||||
if arrNodesReversed.count >= 3,
|
||||
let kvAnterior = arrNodesReversed[2].node?.currentKeyValue(),
|
||||
let kvAnterior = arrNodesReversed[2].node?.currentKeyValue,
|
||||
!arrEndingPunctuation.contains(kvAnterior.value)
|
||||
{
|
||||
strAnterior = "(\(kvAnterior.key),\(kvAnterior.value))"
|
||||
|
|
|
@ -24,33 +24,55 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
extension Megrez {
|
||||
/// 分節讀音槽。
|
||||
public class BlockReadingBuilder {
|
||||
var mutMaximumBuildSpanLength = 10
|
||||
var mutCursorIndex: Int = 0
|
||||
var mutReadings: [String] = []
|
||||
var mutGrid: Grid = .init()
|
||||
var mutLM: LanguageModel
|
||||
var mutJoinSeparator: String = ""
|
||||
/// 該分節讀音曹內可以允許的最大詞長。
|
||||
private var mutMaximumBuildSpanLength = 10
|
||||
/// 該分節讀音槽的游標位置。
|
||||
private var mutCursorIndex: Int = 0
|
||||
/// 該分節讀音槽的讀音陣列。
|
||||
private var mutReadings: [String] = []
|
||||
/// 該分節讀音槽的軌格。
|
||||
private var mutGrid: Grid = .init()
|
||||
/// 該分節讀音槽所使用的語言模型。
|
||||
private var mutLM: LanguageModel
|
||||
|
||||
public init(lm: LanguageModel, length: Int = 10) {
|
||||
mutLM = lm
|
||||
mutMaximumBuildSpanLength = length
|
||||
/// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
|
||||
public var joinSeparator: String = ""
|
||||
/// 公開:該分節讀音槽的游標位置。
|
||||
public var cursorIndex: Int {
|
||||
get { mutCursorIndex }
|
||||
set { mutCursorIndex = (newValue < 0) ? 0 : min(newValue, mutReadings.count) }
|
||||
}
|
||||
|
||||
/// 公開:該分節讀音槽的軌格(唯讀)。
|
||||
public var grid: Grid { mutGrid }
|
||||
/// 公開:該分節讀音槽的長度,也就是內建漢字讀音的數量(唯讀)。
|
||||
public var length: Int { mutReadings.count }
|
||||
/// 公開:該分節讀音槽的讀音陣列(唯讀)。
|
||||
public var readings: [String] { mutReadings }
|
||||
|
||||
/// 分節讀音槽。
|
||||
/// - Parameters:
|
||||
/// - lm: 語言模型。可以是任何基於 Megrez.LanguageModel 的衍生型別。
|
||||
/// - length: 指定該分節讀音曹內可以允許的最大詞長,預設為 10 字。
|
||||
/// - separator: 多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
|
||||
public init(lm: LanguageModel, length: Int = 10, separator: String = "") {
|
||||
mutLM = lm
|
||||
mutMaximumBuildSpanLength = length
|
||||
joinSeparator = separator
|
||||
}
|
||||
|
||||
/// 分節讀音槽自我清空專用函數。
|
||||
public func clear() {
|
||||
mutCursorIndex = 0
|
||||
mutReadings.removeAll()
|
||||
mutGrid.clear()
|
||||
}
|
||||
|
||||
public func length() -> Int { mutReadings.count }
|
||||
|
||||
public func cursorIndex() -> Int { mutCursorIndex }
|
||||
|
||||
public func setCursorIndex(newIndex: Int) {
|
||||
mutCursorIndex = min(newIndex, mutReadings.count)
|
||||
}
|
||||
|
||||
/// 在游標位置插入給定的讀音。
|
||||
/// - Parameters:
|
||||
/// - reading: 要插入的讀音。
|
||||
public func insertReadingAtCursor(reading: String) {
|
||||
mutReadings.insert(reading, at: mutCursorIndex)
|
||||
mutGrid.expandGridByOneAt(location: mutCursorIndex)
|
||||
|
@ -58,8 +80,8 @@ extension Megrez {
|
|||
mutCursorIndex += 1
|
||||
}
|
||||
|
||||
public func readings() -> [String] { mutReadings }
|
||||
|
||||
/// 朝著與文字輸入方向相反的方向、砍掉一個與游標相鄰的讀音。
|
||||
/// 在威注音的術語體系當中,「與文字輸入方向相反的方向」為向後(Rear)。
|
||||
@discardableResult public func deleteReadingAtTheRearOfCursor() -> Bool {
|
||||
if mutCursorIndex == 0 {
|
||||
return false
|
||||
|
@ -72,6 +94,8 @@ extension Megrez {
|
|||
return true
|
||||
}
|
||||
|
||||
/// 朝著往文字輸入方向、砍掉一個與游標相鄰的讀音。
|
||||
/// 在威注音的術語體系當中,「文字輸入方向」為向前(Front)。
|
||||
@discardableResult public func deleteReadingToTheFrontOfCursor() -> Bool {
|
||||
if mutCursorIndex == mutReadings.count {
|
||||
return false
|
||||
|
@ -83,8 +107,12 @@ extension Megrez {
|
|||
return true
|
||||
}
|
||||
|
||||
/// 移除該分節讀音槽的第一個讀音單元。
|
||||
///
|
||||
/// 用於輸入法組字區長度上限處理:
|
||||
/// 將該位置要溢出的敲字內容遞交之後、再執行這個函數。
|
||||
@discardableResult public func removeHeadReadings(count: Int) -> Bool {
|
||||
if count > length() {
|
||||
if count > length {
|
||||
return false
|
||||
}
|
||||
|
||||
|
@ -100,17 +128,108 @@ extension Megrez {
|
|||
return true
|
||||
}
|
||||
|
||||
public func setJoinSeparator(separator: String) {
|
||||
mutJoinSeparator = separator
|
||||
// MARK: - Walker
|
||||
|
||||
/// 對已給定的軌格按照給定的位置與條件進行正向爬軌。
|
||||
///
|
||||
/// 其實就是將反向爬軌的結果顛倒順序再給出來而已,省得使用者自己再顛倒一遍。
|
||||
/// - Parameters:
|
||||
/// - at: 開始爬軌的位置。
|
||||
/// - score: 給定累計權重,非必填參數。預設值為 0。
|
||||
/// - nodesLimit: 限定最多只爬多少個節點。
|
||||
/// - balanced: 啟用平衡權重,在節點權重的基礎上根據節點幅位長度來加權。
|
||||
public func walk(
|
||||
at location: Int,
|
||||
score accumulatedScore: Double = 0.0,
|
||||
nodesLimit: Int = 0,
|
||||
balanced: Bool = false
|
||||
) -> [NodeAnchor] {
|
||||
Array(
|
||||
reverseWalk(
|
||||
at: location, score: accumulatedScore,
|
||||
nodesLimit: nodesLimit, balanced: balanced
|
||||
).reversed())
|
||||
}
|
||||
|
||||
public func joinSeparator() -> String { mutJoinSeparator }
|
||||
/// 對已給定的軌格按照給定的位置與條件進行反向爬軌。
|
||||
/// - Parameters:
|
||||
/// - at: 開始爬軌的位置。
|
||||
/// - score: 給定累計權重,非必填參數。預設值為 0。
|
||||
/// - nodesLimit: 限定最多只爬多少個節點。
|
||||
/// - balanced: 啟用平衡權重,在節點權重的基礎上根據節點幅位長度來加權。
|
||||
public func reverseWalk(
|
||||
at location: Int,
|
||||
score accumulatedScore: Double = 0.0,
|
||||
nodesLimit: Int = 0,
|
||||
balanced: Bool = false
|
||||
) -> [NodeAnchor] {
|
||||
if location == 0 || location > mutGrid.width {
|
||||
return [] as [NodeAnchor]
|
||||
}
|
||||
|
||||
public func grid() -> Grid { mutGrid }
|
||||
var paths: [[NodeAnchor]] = []
|
||||
var nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location)
|
||||
|
||||
public func build() {
|
||||
// if (mutLM == nil) { return } // 這個出不了 nil,所以註釋掉。
|
||||
if balanced {
|
||||
nodes.sort {
|
||||
$0.balancedScore > $1.balancedScore
|
||||
}
|
||||
}
|
||||
|
||||
for (i, n) in nodes.enumerated() {
|
||||
// 只檢查前 X 個 NodeAnchor 是否有 node。
|
||||
// 這裡有 abs 是為了防止有白癡填負數。
|
||||
if abs(nodesLimit) > 0, i == abs(nodesLimit) - 1 {
|
||||
break
|
||||
}
|
||||
|
||||
var n = n
|
||||
guard let nNode = n.node else {
|
||||
continue
|
||||
}
|
||||
|
||||
n.accumulatedScore = accumulatedScore + nNode.score
|
||||
|
||||
// 利用幅位長度來決定權重。
|
||||
// 這樣一來,例:「再見」比「在」與「見」的權重更高。
|
||||
if balanced {
|
||||
let weightedScore: Double = (Double(n.spanningLength) - 1) * 2
|
||||
n.accumulatedScore += weightedScore
|
||||
}
|
||||
|
||||
var path: [NodeAnchor] = reverseWalk(
|
||||
at: location - n.spanningLength,
|
||||
score: n.accumulatedScore
|
||||
)
|
||||
|
||||
path.insert(n, at: 0)
|
||||
|
||||
paths.append(path)
|
||||
|
||||
// 始終使用固定的候選字詞
|
||||
if balanced, nNode.score >= 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !paths.isEmpty {
|
||||
if var result = paths.first {
|
||||
for value in paths {
|
||||
if let vLast = value.last, let rLast = result.last {
|
||||
if vLast.accumulatedScore > rLast.accumulatedScore {
|
||||
result = value
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
return [] as [NodeAnchor]
|
||||
}
|
||||
|
||||
// MARK: - Private functions
|
||||
|
||||
private func build() {
|
||||
let itrBegin: Int =
|
||||
(mutCursorIndex < mutMaximumBuildSpanLength) ? 0 : mutCursorIndex - mutMaximumBuildSpanLength
|
||||
let itrEnd: Int = min(mutCursorIndex + mutMaximumBuildSpanLength, mutReadings.count)
|
||||
|
@ -121,7 +240,7 @@ extension Megrez {
|
|||
break
|
||||
}
|
||||
let strSlice = mutReadings[p..<(p + q)]
|
||||
let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator)
|
||||
let combinedReading: String = join(slice: strSlice, separator: joinSeparator)
|
||||
|
||||
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
|
||||
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
|
||||
|
@ -134,7 +253,7 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
|
||||
public func join(slice strSlice: ArraySlice<String>, separator: String) -> String {
|
||||
private func join(slice strSlice: ArraySlice<String>, separator: String) -> String {
|
||||
var arrResult: [String] = []
|
||||
for value in strSlice {
|
||||
arrResult.append(value)
|
||||
|
|
|
@ -1,123 +0,0 @@
|
|||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
extension Megrez {
|
||||
public class Walker {
|
||||
var mutGrid: Grid
|
||||
|
||||
public init(grid: Megrez.Grid = Megrez.Grid()) {
|
||||
mutGrid = grid
|
||||
}
|
||||
|
||||
public func walk(
|
||||
at location: Int,
|
||||
score accumulatedScore: Double = 0.0,
|
||||
nodesLimit: Int = 0,
|
||||
balanced: Bool = false
|
||||
) -> [NodeAnchor] {
|
||||
var arrReturn: [NodeAnchor] = []
|
||||
let arrReversedSource = reverseWalk(
|
||||
at: location, score: accumulatedScore,
|
||||
nodesLimit: nodesLimit, balanced: balanced
|
||||
).reversed()
|
||||
|
||||
for neta in arrReversedSource {
|
||||
arrReturn.append(neta)
|
||||
}
|
||||
|
||||
return arrReturn
|
||||
}
|
||||
|
||||
public func reverseWalk(
|
||||
at location: Int,
|
||||
score accumulatedScore: Double = 0.0,
|
||||
nodesLimit: Int = 0,
|
||||
balanced: Bool = false
|
||||
) -> [NodeAnchor] {
|
||||
if location == 0 || location > mutGrid.width() {
|
||||
return [] as [NodeAnchor]
|
||||
}
|
||||
|
||||
var paths: [[NodeAnchor]] = []
|
||||
var nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location)
|
||||
|
||||
if balanced {
|
||||
nodes.sort {
|
||||
$0.balancedScore > $1.balancedScore
|
||||
}
|
||||
}
|
||||
|
||||
for (i, n) in nodes.enumerated() {
|
||||
// 只檢查前 X 個 NodeAnchor 是否有 node。
|
||||
// 這裡有 abs 是為了防止有白癡填負數。
|
||||
if abs(nodesLimit) > 0, i == abs(nodesLimit) - 1 {
|
||||
break
|
||||
}
|
||||
|
||||
var n = n
|
||||
guard let nNode = n.node else {
|
||||
continue
|
||||
}
|
||||
|
||||
n.accumulatedScore = accumulatedScore + nNode.score()
|
||||
|
||||
// 利用 Spanning Length 來決定權重。
|
||||
// 這樣一來,例:「再見」比「在」與「見」的權重更高。
|
||||
if balanced {
|
||||
let weightedScore: Double = (Double(n.spanningLength) - 1) * 2
|
||||
n.accumulatedScore += weightedScore
|
||||
}
|
||||
|
||||
var path: [NodeAnchor] = reverseWalk(
|
||||
at: location - n.spanningLength,
|
||||
score: n.accumulatedScore
|
||||
)
|
||||
|
||||
path.insert(n, at: 0)
|
||||
|
||||
paths.append(path)
|
||||
|
||||
// 始終使用固定的候選字
|
||||
if balanced, nNode.score() >= 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !paths.isEmpty {
|
||||
if var result = paths.first {
|
||||
for value in paths {
|
||||
if let vLast = value.last, let rLast = result.last {
|
||||
if vLast.accumulatedScore > rLast.accumulatedScore {
|
||||
result = value
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
return [] as [NodeAnchor]
|
||||
}
|
||||
}
|
||||
}
|
|
@ -24,17 +24,28 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
extension Megrez {
|
||||
/// 軌格。
|
||||
public class Grid {
|
||||
var mutSpans: [Megrez.Span]
|
||||
/// 幅位陣列。
|
||||
private var mutSpans: [Megrez.Span]
|
||||
|
||||
/// 軌格的寬度,也就是其內的幅位陣列當中的幅位數量。
|
||||
var width: Int { mutSpans.count }
|
||||
|
||||
public init() {
|
||||
mutSpans = [Megrez.Span]()
|
||||
}
|
||||
|
||||
/// 自我清空該軌格的內容。
|
||||
public func clear() {
|
||||
mutSpans = [Megrez.Span]()
|
||||
}
|
||||
|
||||
/// 往該軌格的指定位置插入指定幅位長度的指定節點。
|
||||
/// - Parameters:
|
||||
/// - node: 節點。
|
||||
/// - location: 位置。
|
||||
/// - spanningLength: 給定的幅位長度。
|
||||
public func insertNode(node: Node, location: Int, spanningLength: Int) {
|
||||
if location >= mutSpans.count {
|
||||
let diff = location - mutSpans.count + 1
|
||||
|
@ -45,15 +56,23 @@ extension Megrez {
|
|||
mutSpans[location].insert(node: node, length: spanningLength)
|
||||
}
|
||||
|
||||
/// 給定索引鍵、位置、幅位長度,在該軌格內確認是否有對應的節點存在。
|
||||
/// - Parameters:
|
||||
/// - location: 位置。
|
||||
/// - spanningLength: 給定的幅位長度。
|
||||
/// - key: 索引鍵。
|
||||
public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool {
|
||||
if location > mutSpans.count {
|
||||
return false
|
||||
}
|
||||
|
||||
let n = mutSpans[location].node(length: spanningLength)
|
||||
return n == nil ? false : key == n?.key()
|
||||
return n == nil ? false : key == n?.key
|
||||
}
|
||||
|
||||
/// 在該軌格的指定位置擴增一個幅位。
|
||||
/// - Parameters:
|
||||
/// - location: 位置。
|
||||
public func expandGridByOneAt(location: Int) {
|
||||
// 這裡加入 abs 完全是一個防呆設計
|
||||
mutSpans.insert(Span(), at: abs(location))
|
||||
|
@ -65,6 +84,9 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
|
||||
/// 在該軌格的指定位置減少一個幅位。
|
||||
/// - Parameters:
|
||||
/// - location: 位置。
|
||||
public func shrinkGridByOneAt(location: Int) {
|
||||
if location >= mutSpans.count {
|
||||
return
|
||||
|
@ -77,8 +99,9 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
|
||||
public func width() -> Int { mutSpans.count }
|
||||
|
||||
/// 給定位置,枚舉出所有在這個位置結尾的節點。
|
||||
/// - Parameters:
|
||||
/// - location: 位置。
|
||||
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
|
||||
var results: [NodeAnchor] = []
|
||||
if !mutSpans.isEmpty, location <= mutSpans.count {
|
||||
|
@ -100,6 +123,9 @@ extension Megrez {
|
|||
return results
|
||||
}
|
||||
|
||||
/// 給定位置,枚舉出所有在這個位置結尾、或者橫跨該位置的節點。
|
||||
/// - Parameters:
|
||||
/// - location: 位置。
|
||||
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
|
||||
var results: [NodeAnchor] = []
|
||||
if !mutSpans.isEmpty, location <= mutSpans.count {
|
||||
|
@ -126,14 +152,18 @@ extension Megrez {
|
|||
return results
|
||||
}
|
||||
|
||||
public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor {
|
||||
/// 將給定位置的節點的候選字詞改為與給定的字串一致的候選字詞。
|
||||
/// - Parameters:
|
||||
/// - location: 位置。
|
||||
/// - value: 給定字串。
|
||||
@discardableResult public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor {
|
||||
var node = NodeAnchor()
|
||||
for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
|
||||
guard let theNode = nodeAnchor.node else {
|
||||
continue
|
||||
}
|
||||
let candidates = theNode.candidates()
|
||||
// Reset the candidate-fixed state of every node at the location.
|
||||
let candidates = theNode.candidates
|
||||
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
|
||||
theNode.resetCandidate()
|
||||
for (i, candidate) in candidates.enumerated() {
|
||||
if candidate.value == value {
|
||||
|
@ -146,13 +176,18 @@ extension Megrez {
|
|||
return node
|
||||
}
|
||||
|
||||
/// 將給定位置的節點的與給定的字串一致的候選字詞的權重複寫為給定權重數值。
|
||||
/// - Parameters:
|
||||
/// - location: 位置。
|
||||
/// - value: 給定字串。
|
||||
/// - overridingScore: 給定權重數值。
|
||||
public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) {
|
||||
for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
|
||||
guard let theNode = nodeAnchor.node else {
|
||||
continue
|
||||
}
|
||||
let candidates = theNode.candidates()
|
||||
// Reset the candidate-fixed state of every node at the location.
|
||||
let candidates = theNode.candidates
|
||||
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
|
||||
theNode.resetCandidate()
|
||||
for (i, candidate) in candidates.enumerated() {
|
||||
if candidate.value == value {
|
||||
|
@ -164,3 +199,38 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - DumpDOT-related functions.
|
||||
|
||||
extension Megrez.Grid {
|
||||
public var dumpDOT: String {
|
||||
var sst = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n"
|
||||
for (p, span) in mutSpans.enumerated() {
|
||||
for ni in 0...(span.maximumLength) {
|
||||
guard let np: Megrez.Node = span.node(length: ni) else {
|
||||
continue
|
||||
}
|
||||
if p == 0 {
|
||||
sst += "BOS -> \(np.currentKeyValue.value);\n"
|
||||
}
|
||||
|
||||
sst += "\(np.currentKeyValue.value);\n"
|
||||
|
||||
if (p + ni) < mutSpans.count {
|
||||
let dstSpan = mutSpans[p + ni]
|
||||
for q in 0...(dstSpan.maximumLength) {
|
||||
if let dn = dstSpan.node(length: q) {
|
||||
sst += np.currentKeyValue.value + " -> " + dn.currentKeyValue.value + ";\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (p + ni) == mutSpans.count {
|
||||
sst += np.currentKeyValue.value + " -> EOS;\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
sst += "EOS;\n}\n"
|
||||
return sst
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,19 +24,52 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
extension Megrez {
|
||||
@frozen public struct NodeAnchor {
|
||||
/// 節锚。
|
||||
@frozen public struct NodeAnchor: CustomStringConvertible {
|
||||
/// 節點。一個節锚內不一定有節點。
|
||||
public var node: Node?
|
||||
/// 節锚所在的位置。
|
||||
public var location: Int = 0
|
||||
/// 幅位長度。
|
||||
public var spanningLength: Int = 0
|
||||
/// 累計權重。
|
||||
public var accumulatedScore: Double = 0.0
|
||||
/// 索引鍵的長度。
|
||||
public var keyLength: Int {
|
||||
node?.key().count ?? 0
|
||||
node?.key.count ?? 0
|
||||
}
|
||||
|
||||
/// 將當前節锚列印成一個字串。
|
||||
public var description: String {
|
||||
var stream = ""
|
||||
stream += "{@(" + String(location) + "," + String(spanningLength) + "),"
|
||||
if let node = node {
|
||||
stream += node.description
|
||||
} else {
|
||||
stream += "null"
|
||||
}
|
||||
stream += "}"
|
||||
return stream
|
||||
}
|
||||
|
||||
/// 獲取平衡權重。
|
||||
public var balancedScore: Double {
|
||||
let weightedScore: Double = (Double(spanningLength) - 1) * 2
|
||||
let nodeScore: Double = node?.score() ?? 0
|
||||
let nodeScore: Double = node?.score ?? 0
|
||||
return weightedScore + nodeScore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - DumpDOT-related functions.
|
||||
|
||||
extension Array where Element == Megrez.NodeAnchor {
|
||||
/// 將節锚陣列列印成一個字串。
|
||||
public var description: String {
|
||||
var arrOutputContent = [""]
|
||||
for anchor in self {
|
||||
arrOutputContent.append(anchor.description)
|
||||
}
|
||||
return arrOutputContent.joined(separator: "<-")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,23 +24,28 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
extension Megrez {
|
||||
/// 幅位。
|
||||
@frozen public struct Span {
|
||||
private var mutLengthNodeMap: [Int: Megrez.Node]
|
||||
private var mutMaximumLength: Int
|
||||
/// 辭典:以節點長度為索引,以節點為資料值。
|
||||
private var mutLengthNodeMap: [Int: Megrez.Node] = [:]
|
||||
/// 最大節點長度。
|
||||
private var mutMaximumLength: Int = 0
|
||||
|
||||
/// 公開:最長幅距(唯讀)。
|
||||
var maximumLength: Int {
|
||||
mutMaximumLength
|
||||
}
|
||||
|
||||
public init() {
|
||||
mutLengthNodeMap = [:]
|
||||
mutMaximumLength = 0
|
||||
}
|
||||
|
||||
/// 自我清空,各項參數歸零。
|
||||
mutating func clear() {
|
||||
mutLengthNodeMap.removeAll()
|
||||
mutMaximumLength = 0
|
||||
}
|
||||
|
||||
/// 往自身插入一個節點、及給定的節點長度。
|
||||
/// - Parameters:
|
||||
/// - node: 節點。
|
||||
/// - length: 給定的節點長度。
|
||||
mutating func insert(node: Node, length: Int) {
|
||||
mutLengthNodeMap[length] = node
|
||||
if length > mutMaximumLength {
|
||||
|
@ -48,6 +53,9 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
|
||||
/// 移除任何比給定的長度更長的節點。
|
||||
/// - Parameters:
|
||||
/// - length: 給定的節點長度。
|
||||
mutating func removeNodeOfLengthGreaterThan(_ length: Int) {
|
||||
if length > mutMaximumLength { return }
|
||||
var max = 0
|
||||
|
@ -67,6 +75,9 @@ extension Megrez {
|
|||
mutMaximumLength = max
|
||||
}
|
||||
|
||||
/// 給定節點長度,獲取節點。
|
||||
/// - Parameters:
|
||||
/// - length: 給定的節點長度。
|
||||
public func node(length: Int) -> Node? {
|
||||
mutLengthNodeMap[length]
|
||||
}
|
||||
|
|
|
@ -24,55 +24,69 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
extension Megrez {
|
||||
public class Node {
|
||||
let mutLM: LanguageModel
|
||||
var mutKey: String
|
||||
var mutScore: Double = 0
|
||||
var mutUnigrams: [Unigram]
|
||||
var mutCandidates: [KeyValuePair]
|
||||
var mutValueUnigramIndexMap: [String: Int]
|
||||
var mutPrecedingBigramMap: [KeyValuePair: [Megrez.Bigram]]
|
||||
|
||||
var mutCandidateFixed: Bool = false
|
||||
var mutSelectedUnigramIndex: Int = 0
|
||||
|
||||
let kSelectedCandidateScore: Double = 99
|
||||
|
||||
public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) {
|
||||
mutLM = LanguageModel()
|
||||
|
||||
mutKey = key
|
||||
mutScore = 0
|
||||
|
||||
mutUnigrams = unigrams
|
||||
mutCandidates = []
|
||||
mutValueUnigramIndexMap = [:]
|
||||
mutPrecedingBigramMap = [:]
|
||||
|
||||
mutCandidateFixed = false
|
||||
mutSelectedUnigramIndex = 0
|
||||
|
||||
if bigrams == [] {
|
||||
node(key: key, unigrams: unigrams, bigrams: bigrams)
|
||||
} else {
|
||||
node(key: key, unigrams: unigrams)
|
||||
}
|
||||
/// 節點。
|
||||
public class Node: CustomStringConvertible {
|
||||
/// 當前節點對應的語言模型。
|
||||
private let mutLM: LanguageModel = .init()
|
||||
/// 鍵。
|
||||
private var mutKey: String = ""
|
||||
/// 當前節點的當前被選中的候選字詞「在該節點內的」目前的權重。
|
||||
private var mutScore: Double = 0
|
||||
/// 單元圖陣列。
|
||||
private var mutUnigrams: [Unigram]
|
||||
/// 雙元圖陣列。
|
||||
private var mutBigrams: [Bigram]
|
||||
/// 候選字詞陣列,以鍵值陣列的形式存在。
|
||||
private var mutCandidates: [KeyValuePair] = []
|
||||
/// 專門「用單元圖資料值來調查索引值」的辭典。
|
||||
private var mutValueUnigramIndexMap: [String: Int] = [:]
|
||||
/// 專門「用給定鍵值來取對應的雙元圖陣列」的辭典。
|
||||
private var mutPrecedingBigramMap: [KeyValuePair: [Megrez.Bigram]] = [:]
|
||||
/// 狀態標記變數,用來記載當前節點是否處於候選字詞鎖定狀態。
|
||||
private var mutCandidateFixed: Bool = false
|
||||
/// 用來登記「當前選中的單元圖」的索引值的變數。
|
||||
private var mutSelectedUnigramIndex: Int = 0
|
||||
/// 用來登記要施加給「『被標記為選中狀態』的候選字詞」的複寫權重的數值。
|
||||
private let kSelectedCandidateScore: Double = 99
|
||||
/// 將當前節點列印成一個字串。
|
||||
public var description: String {
|
||||
"(node,key:\(mutKey),fixed:\(mutCandidateFixed ? "true" : "false"),selected:\(mutSelectedUnigramIndex),\(mutUnigrams))"
|
||||
}
|
||||
|
||||
public func node(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) {
|
||||
var unigrams = unigrams
|
||||
/// 公開:候選字詞陣列(唯讀),以鍵值陣列的形式存在。
|
||||
var candidates: [KeyValuePair] { mutCandidates }
|
||||
/// 公開:用來登記「當前選中的單元圖」的索引值的變數(唯讀)。
|
||||
var isCandidateFixed: Bool { mutCandidateFixed }
|
||||
|
||||
/// 公開:鍵(唯讀)。
|
||||
var key: String { mutKey }
|
||||
/// 公開:當前節點的當前被選中的候選字詞「在該節點內的」目前的權重(唯讀)。
|
||||
var score: Double { mutScore }
|
||||
/// 公開:當前被選中的候選字詞的鍵值配對。
|
||||
var currentKeyValue: KeyValuePair {
|
||||
mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex]
|
||||
}
|
||||
|
||||
/// 公開:給出當前單元圖陣列內最高的權重數值。
|
||||
var highestUnigramScore: Double { mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score }
|
||||
|
||||
/// 初期化一個節點。
|
||||
/// - Parameters:
|
||||
/// - key: 索引鍵。
|
||||
/// - unigrams: 單元圖陣列。
|
||||
/// - bigrams: 雙元圖陣列(非必填)。
|
||||
public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) {
|
||||
mutKey = key
|
||||
unigrams.sort {
|
||||
mutUnigrams = unigrams
|
||||
mutBigrams = bigrams
|
||||
|
||||
mutUnigrams.sort {
|
||||
$0.score > $1.score
|
||||
}
|
||||
|
||||
if !mutUnigrams.isEmpty {
|
||||
mutScore = mutUnigrams[0].score
|
||||
}
|
||||
|
||||
for (i, theGram) in unigrams.enumerated() {
|
||||
mutValueUnigramIndexMap[theGram.keyValue.value] = i
|
||||
mutCandidates.append(theGram.keyValue)
|
||||
for (i, gram) in mutUnigrams.enumerated() {
|
||||
mutValueUnigramIndexMap[gram.keyValue.value] = i
|
||||
mutCandidates.append(gram.keyValue)
|
||||
}
|
||||
|
||||
for gram in bigrams {
|
||||
|
@ -80,11 +94,14 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
|
||||
/// 對擁有「給定的前述鍵值陣列」的節點提權。
|
||||
/// - Parameters:
|
||||
/// - precedingKeyValues: 前述鍵值陣列。
|
||||
public func primeNodeWith(precedingKeyValues: [KeyValuePair]) {
|
||||
var newIndex = mutSelectedUnigramIndex
|
||||
var max = mutScore
|
||||
|
||||
if !isCandidateFixed() {
|
||||
if !isCandidateFixed {
|
||||
for neta in precedingKeyValues {
|
||||
let bigrams = mutPrecedingBigramMap[neta] ?? []
|
||||
for bigram in bigrams {
|
||||
|
@ -107,16 +124,17 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
|
||||
public func isCandidateFixed() -> Bool { mutCandidateFixed }
|
||||
|
||||
public func candidates() -> [KeyValuePair] { mutCandidates }
|
||||
|
||||
/// 選中位於給定索引位置的候選字詞。
|
||||
/// - Parameters:
|
||||
/// - index: 索引位置。
|
||||
/// - fix: 是否將當前解點標記為「候選詞已鎖定」的狀態。
|
||||
public func selectCandidateAt(index: Int = 0, fix: Bool = false) {
|
||||
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
|
||||
mutCandidateFixed = fix
|
||||
mutScore = kSelectedCandidateScore
|
||||
}
|
||||
|
||||
/// 重設該節點的候選字詞狀態。
|
||||
public func resetCandidate() {
|
||||
mutSelectedUnigramIndex = 0
|
||||
mutCandidateFixed = false
|
||||
|
@ -125,16 +143,19 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
|
||||
/// 選中位於給定索引位置的候選字詞、且施加給定的權重。
|
||||
/// - Parameters:
|
||||
/// - index: 索引位置。
|
||||
/// - score: 給定權重條件。
|
||||
public func selectFloatingCandidateAt(index: Int, score: Double) {
|
||||
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
|
||||
mutCandidateFixed = false
|
||||
mutScore = score
|
||||
}
|
||||
|
||||
public func key() -> String { mutKey }
|
||||
|
||||
public func score() -> Double { mutScore }
|
||||
|
||||
/// 藉由給定的候選字詞字串,找出在庫的單元圖權重數值。沒有的話就找零。
|
||||
/// - Parameters:
|
||||
/// - candidate: 給定的候選字詞字串。
|
||||
public func scoreFor(candidate: String) -> Double {
|
||||
for unigram in mutUnigrams {
|
||||
if unigram.keyValue.value == candidate {
|
||||
|
@ -144,14 +165,6 @@ extension Megrez {
|
|||
return 0.0
|
||||
}
|
||||
|
||||
public func currentKeyValue() -> KeyValuePair {
|
||||
mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex]
|
||||
}
|
||||
|
||||
public func highestUnigramScore() -> Double {
|
||||
mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score
|
||||
}
|
||||
|
||||
public static func == (lhs: Node, rhs: Node) -> Bool {
|
||||
lhs.mutUnigrams == rhs.mutUnigrams && lhs.mutCandidates == rhs.mutCandidates
|
||||
&& lhs.mutValueUnigramIndexMap == rhs.mutValueUnigramIndexMap
|
||||
|
|
|
@ -24,19 +24,23 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
extension Megrez {
|
||||
// 這裡充其量只是框架,回頭實際使用時需要派生一個型別、且重寫相關函數。
|
||||
// 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函數的參數設計。
|
||||
/// 語言模型框架,回頭實際使用時需要派生一個型別、且重寫相關函數。
|
||||
open class LanguageModel {
|
||||
public init() {}
|
||||
|
||||
// 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函數的參數設計。
|
||||
|
||||
/// 給定鍵,讓語言模型找給一筆單元圖。
|
||||
open func unigramsFor(key: String) -> [Megrez.Unigram] {
|
||||
key.isEmpty ? [Megrez.Unigram]() : [Megrez.Unigram]()
|
||||
}
|
||||
|
||||
/// 給定當前鍵與前述鍵,讓語言模型找給一筆雙元圖。
|
||||
open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
|
||||
}
|
||||
|
||||
/// 給定鍵,
|
||||
open func hasUnigramsFor(key: String) -> Bool {
|
||||
key.count != 0
|
||||
}
|
||||
|
|
|
@ -24,17 +24,28 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
extension Megrez {
|
||||
@frozen public struct Bigram: Equatable {
|
||||
/// 雙元圖。
|
||||
@frozen public struct Bigram: Equatable, CustomStringConvertible {
|
||||
/// 當前鍵值。
|
||||
public var keyValue: KeyValuePair
|
||||
/// 前述鍵值。
|
||||
public var precedingKeyValue: KeyValuePair
|
||||
/// 權重。
|
||||
public var score: Double
|
||||
// var paired: String
|
||||
/// 將當前雙元圖列印成一個字串。
|
||||
public var description: String {
|
||||
"(" + keyValue.description + "|" + precedingKeyValue.description + "," + String(score) + ")"
|
||||
}
|
||||
|
||||
/// 初期化一筆「雙元圖」。一筆雙元圖由一組前述鍵值配對、一組當前鍵值配對、與一筆權重數值組成。
|
||||
/// - Parameters:
|
||||
/// - precedingKeyValue: 前述鍵值。
|
||||
/// - keyValue: 當前鍵值。
|
||||
/// - score: 權重(雙精度小數)。
|
||||
public init(precedingKeyValue: KeyValuePair, keyValue: KeyValuePair, score: Double) {
|
||||
self.keyValue = keyValue
|
||||
self.precedingKeyValue = precedingKeyValue
|
||||
self.score = score
|
||||
// paired = "(" + keyValue.paired + "|" + precedingKeyValue.paired + "," + String(score) + ")"
|
||||
}
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
|
@ -44,16 +55,6 @@ extension Megrez {
|
|||
// hasher.combine(paired)
|
||||
}
|
||||
|
||||
// static func getPairedBigrams(grams: [Bigram]) -> String {
|
||||
// var arrOutputContent = [""]
|
||||
// var index = 0
|
||||
// for gram in grams {
|
||||
// arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired])
|
||||
// index += 1
|
||||
// }
|
||||
// return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
|
||||
// }
|
||||
|
||||
public static func == (lhs: Bigram, rhs: Bigram) -> Bool {
|
||||
lhs.precedingKeyValue == rhs.precedingKeyValue && lhs.keyValue == rhs.keyValue && lhs.score == rhs.score
|
||||
}
|
||||
|
@ -62,13 +63,18 @@ extension Megrez {
|
|||
lhs.precedingKeyValue < rhs.precedingKeyValue
|
||||
|| (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue))
|
||||
}
|
||||
|
||||
var description: String {
|
||||
"\(keyValue):\(score)"
|
||||
}
|
||||
|
||||
var debugDescription: String {
|
||||
"Bigram(keyValue: \(keyValue), score: \(score))"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - DumpDOT-related functions.
|
||||
|
||||
extension Array where Element == Megrez.Bigram {
|
||||
/// 將雙元圖陣列列印成一個字串。
|
||||
public var description: String {
|
||||
var arrOutputContent = [""]
|
||||
for (index, gram) in enumerated() {
|
||||
arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.description])
|
||||
}
|
||||
return "[" + String(count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,21 +24,29 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
extension Megrez {
|
||||
@frozen public struct Unigram: Equatable {
|
||||
/// 單元圖。
|
||||
@frozen public struct Unigram: Equatable, CustomStringConvertible {
|
||||
/// 鍵值。
|
||||
public var keyValue: KeyValuePair
|
||||
/// 權重。
|
||||
public var score: Double
|
||||
// var paired: String
|
||||
/// 將當前單元圖列印成一個字串。
|
||||
public var description: String {
|
||||
"(" + keyValue.description + "," + String(score) + ")"
|
||||
}
|
||||
|
||||
/// 初期化一筆「單元圖」。一筆單元圖由一組鍵值配對與一筆權重數值組成。
|
||||
/// - Parameters:
|
||||
/// - keyValue: 鍵值。
|
||||
/// - score: 權重(雙精度小數)。
|
||||
public init(keyValue: KeyValuePair, score: Double) {
|
||||
self.keyValue = keyValue
|
||||
self.score = score
|
||||
// paired = "(" + keyValue.paired + "," + String(score) + ")"
|
||||
}
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(keyValue)
|
||||
hasher.combine(score)
|
||||
// hasher.combine(paired)
|
||||
}
|
||||
|
||||
// 這個函數不再需要了。
|
||||
|
@ -46,16 +54,6 @@ extension Megrez {
|
|||
a.score > b.score
|
||||
}
|
||||
|
||||
// static func getPairedUnigrams(grams: [Unigram]) -> String {
|
||||
// var arrOutputContent = [""]
|
||||
// var index = 0
|
||||
// for gram in grams {
|
||||
// arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired])
|
||||
// index += 1
|
||||
// }
|
||||
// return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
|
||||
// }
|
||||
|
||||
public static func == (lhs: Unigram, rhs: Unigram) -> Bool {
|
||||
lhs.keyValue == rhs.keyValue && lhs.score == rhs.score
|
||||
}
|
||||
|
@ -63,13 +61,18 @@ extension Megrez {
|
|||
public static func < (lhs: Unigram, rhs: Unigram) -> Bool {
|
||||
lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue)
|
||||
}
|
||||
|
||||
var description: String {
|
||||
"\(keyValue):\(score)"
|
||||
}
|
||||
|
||||
var debugDescription: String {
|
||||
"Unigram(keyValue: \(keyValue), score: \(score))"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - DumpDOT-related functions.
|
||||
|
||||
extension Array where Element == Megrez.Unigram {
|
||||
/// 將單元圖陣列列印成一個字串。
|
||||
public var description: String {
|
||||
var arrOutputContent = [""]
|
||||
for (index, gram) in enumerated() {
|
||||
arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.description])
|
||||
}
|
||||
return "[" + String(count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,21 +24,29 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
*/
|
||||
|
||||
extension Megrez {
|
||||
@frozen public struct KeyValuePair: Equatable, Hashable, Comparable {
|
||||
/// 鍵值配對。
|
||||
@frozen public struct KeyValuePair: Equatable, Hashable, Comparable, CustomStringConvertible {
|
||||
/// 鍵。一般情況下用來放置讀音等可以用來作為索引的內容。
|
||||
public var key: String
|
||||
/// 資料值。
|
||||
public var value: String
|
||||
// public var paired: String
|
||||
/// 將當前鍵值列印成一個字串。
|
||||
public var description: String {
|
||||
"(" + key + "," + value + ")"
|
||||
}
|
||||
|
||||
/// 初期化一組鍵值配對
|
||||
/// - Parameters:
|
||||
/// - key: 鍵。一般情況下用來放置讀音等可以用來作為索引的內容。
|
||||
/// - value: 資料值。
|
||||
public init(key: String = "", value: String = "") {
|
||||
self.key = key
|
||||
self.value = value
|
||||
// paired = "(" + key + "," + value + ")"
|
||||
}
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(key)
|
||||
hasher.combine(value)
|
||||
// hasher.combine(paired)
|
||||
}
|
||||
|
||||
public static func == (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
|
||||
|
@ -60,13 +68,5 @@ extension Megrez {
|
|||
public static func >= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
|
||||
(lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value)
|
||||
}
|
||||
|
||||
public var description: String {
|
||||
"(\(key), \(value))"
|
||||
}
|
||||
|
||||
public var debugDescription: String {
|
||||
"KeyValuePair(key: \(key), value: \(value))"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
5B38F59D281E2E49007D5F5D /* 4_Node.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */; };
|
||||
5B38F59E281E2E49007D5F5D /* 6_Bigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */; };
|
||||
5B38F59F281E2E49007D5F5D /* 3_NodeAnchor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */; };
|
||||
5B38F5A0281E2E49007D5F5D /* 1_Walker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */; };
|
||||
5B38F5A1281E2E49007D5F5D /* 1_BlockReadingBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */; };
|
||||
5B38F5A2281E2E49007D5F5D /* 0_Megrez.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */; };
|
||||
5B38F5A3281E2E49007D5F5D /* 3_Span.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */; };
|
||||
|
@ -300,7 +299,6 @@
|
|||
6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 3_NodeAnchor.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
|
||||
6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 3_Span.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
|
||||
6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 6_Unigram.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
|
||||
6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 1_Walker.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
|
||||
6A15B32421A51F2300B92CD3 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = "<group>"; };
|
||||
6A15B32521A51F2300B92CD3 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = "<group>"; };
|
||||
6A225A1E23679F2600F685C6 /* NotarizedArchives */ = {isa = PBXFileReference; lastKnownFileType = folder; path = NotarizedArchives; sourceTree = "<group>"; };
|
||||
|
@ -769,7 +767,6 @@
|
|||
children = (
|
||||
6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */,
|
||||
6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */,
|
||||
6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */,
|
||||
6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */,
|
||||
6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */,
|
||||
6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */,
|
||||
|
@ -1086,7 +1083,6 @@
|
|||
5B62A34927AE7CD900A19448 /* TooltipController.swift in Sources */,
|
||||
5B61B0CA280BEFD4002E3CFA /* KeyHandler_Misc.swift in Sources */,
|
||||
5B38F59A281E2E49007D5F5D /* 6_Unigram.swift in Sources */,
|
||||
5B38F5A0281E2E49007D5F5D /* 1_Walker.swift in Sources */,
|
||||
5B62A34827AE7CD900A19448 /* ctlCandidateVertical.swift in Sources */,
|
||||
5BA9FD4027FEF3C8002DE248 /* Localization.swift in Sources */,
|
||||
5BAA8FBE282CAF380066C406 /* SyllableComposer.swift in Sources */,
|
||||
|
|
Loading…
Reference in New Issue