1.8.0 SP2 // Megrez fix. Merge Gitee PR!62 from upd/1.8.0sp2
This commit is contained in:
commit
21e4aaff85
|
@ -229,7 +229,7 @@ class KeyHandler {
|
|||
if arrAnchors.isEmpty { return .init() }
|
||||
|
||||
// 讓更長的節錨排序靠前。
|
||||
arrAnchors = arrAnchors.stableSort { $0.keyLength > $1.keyLength }
|
||||
arrAnchors = arrAnchors.stableSort { $0.spanLength > $1.spanLength }
|
||||
|
||||
// 將節錨內的候選字詞資料拓印到輸出陣列內。
|
||||
for currentCandidate in arrAnchors.map(\.node.candidates).joined() {
|
||||
|
|
|
@ -185,7 +185,7 @@ extension vChewing {
|
|||
// MARK: - 核心函式(對外)
|
||||
|
||||
/// 威注音輸入法目前尚未具備對雙元圖的處理能力,故停用該函式。
|
||||
// public func bigramsForKeys(preceedingKey: String, key: String) -> [Megrez.Bigram] { }
|
||||
// public func bigramsFor(preceedingKey: String, key: String) -> [Megrez.Bigram] { }
|
||||
|
||||
/// 給定讀音字串,讓 LMI 給出對應的經過處理的單元圖陣列。
|
||||
/// - Parameter key: 給定的讀音字串。
|
||||
|
@ -257,7 +257,7 @@ extension vChewing {
|
|||
}
|
||||
|
||||
/// 該函式不起作用,僅用來滿足 LangModelProtocol 協定的要求。
|
||||
public func bigramsForKeys(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() }
|
||||
public func bigramsFor(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() }
|
||||
|
||||
// MARK: - 核心函式(對內)
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ extension vChewing {
|
|||
/// - parameters:
|
||||
/// - precedingKey: 前述讀音索引鍵
|
||||
/// - key: 當前讀音索引鍵
|
||||
public func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
// 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。
|
||||
// 其實只要一句「[Megrez.Bigram]()」就夠了。
|
||||
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
|
||||
|
|
|
@ -130,7 +130,7 @@ extension vChewing {
|
|||
/// - parameters:
|
||||
/// - precedingKey: 前述讀音索引鍵
|
||||
/// - key: 當前讀音索引鍵
|
||||
public func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
public func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
// 這裡用了點廢話處理,不然函式構建體會被 Swift 格式整理工具給毀掉。
|
||||
// 其實只要一句「[Megrez.Bigram]()」就夠了。
|
||||
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
|
||||
|
|
|
@ -26,7 +26,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
extension Megrez {
|
||||
/// 組字器。
|
||||
public class Compositor: Grid {
|
||||
/// 文字輸入方向
|
||||
/// 就文字輸入方向而言的方向。
|
||||
public enum TypingDirection { case front, rear }
|
||||
/// 給被丟掉的節點路徑施加的負權重。
|
||||
private let kDroppedPathScore: Double = -999
|
||||
|
@ -38,7 +38,14 @@ extension Megrez {
|
|||
private var langModel: LangModelProtocol
|
||||
/// 允許查詢當前游標位置屬於第幾個幅位座標(從 0 開始算)。
|
||||
private(set) var cursorRegionMap: [Int: Int] = .init()
|
||||
private(set) var walkedAnchors: [Megrez.NodeAnchor] = [] // 用以記錄爬過的節錨的陣列
|
||||
/// 用以記錄爬過的節錨的陣列。
|
||||
private(set) var walkedAnchors: [NodeAnchor] = []
|
||||
|
||||
/// 該函式用以更新爬過的節錨的陣列。
|
||||
/// - Parameter nodes: 傳入的節點陣列。
|
||||
public func updateWalkedAnchors(with nodes: [Node]) {
|
||||
walkedAnchors = nodes.map { Megrez.NodeAnchor(node: $0) }
|
||||
}
|
||||
|
||||
/// 公開:多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
|
||||
public var joinSeparator: String = "-"
|
||||
|
@ -47,7 +54,7 @@ extension Megrez {
|
|||
public var length: Int { readings.count }
|
||||
|
||||
/// 按幅位來前後移動游標。
|
||||
/// - Parameter direction: 移動方向
|
||||
/// - Parameter direction: 移動方向。
|
||||
/// - Returns: 該操作是否順利完成。
|
||||
@discardableResult public func jumpCursorBySpan(to direction: TypingDirection) -> Bool {
|
||||
switch direction {
|
||||
|
@ -88,7 +95,7 @@ extension Megrez {
|
|||
/// - separator: 多字讀音鍵當中用以分割漢字讀音的記號,預設為空。
|
||||
public init(lm: LangModelProtocol, length: Int = 10, separator: String = "-") {
|
||||
langModel = lm
|
||||
super.init(spanLength: abs(length)) // 防呆
|
||||
super.init(spanLengthLimit: abs(length)) // 防呆
|
||||
joinSeparator = separator
|
||||
}
|
||||
|
||||
|
@ -181,7 +188,7 @@ extension Megrez {
|
|||
|
||||
var paths = [[NodeAnchor]]()
|
||||
let nodes = nodesEndingAt(location: location).stableSorted {
|
||||
$0.scoreForSort > $1.scoreForSort
|
||||
$0.node.score > $1.node.score
|
||||
}
|
||||
|
||||
guard !nodes.isEmpty else { return .init() } // 防止下文出現範圍外索引的錯誤
|
||||
|
@ -270,7 +277,7 @@ extension Megrez {
|
|||
if hasMatchedNode(location: p, spanLength: q, key: combinedReading) { continue }
|
||||
let unigrams: [Unigram] = langModel.unigramsFor(key: combinedReading)
|
||||
if unigrams.isEmpty { continue }
|
||||
let n = Node(key: combinedReading, unigrams: unigrams)
|
||||
let n: Node = .init(key: combinedReading, spanLength: q, unigrams: unigrams)
|
||||
insertNode(node: n, location: p, spanLength: q)
|
||||
}
|
||||
}
|
||||
|
@ -282,6 +289,7 @@ extension Megrez {
|
|||
|
||||
internal func updateCursorJumpingTables(_ anchors: [NodeAnchor]) {
|
||||
var cursorRegionMapDict = [Int: Int]()
|
||||
cursorRegionMapDict[-1] = 0 // 防呆
|
||||
var counter = 0
|
||||
for (i, anchor) in anchors.enumerated() {
|
||||
for _ in 0..<anchor.spanLength {
|
||||
|
@ -290,7 +298,6 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
cursorRegionMapDict[counter] = anchors.count
|
||||
cursorRegionMapDict[-1] = 0 // 防呆
|
||||
cursorRegionMap = cursorRegionMapDict
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,8 +41,8 @@ extension Megrez {
|
|||
public var isEmpty: Bool { spans.isEmpty }
|
||||
|
||||
/// 初期化轨格。
|
||||
public init(spanLength: Int = 10) {
|
||||
maxBuildSpanLength = spanLength
|
||||
public init(spanLengthLimit: Int = 10) {
|
||||
maxBuildSpanLength = spanLengthLimit
|
||||
spans = [Megrez.SpanUnit]()
|
||||
}
|
||||
|
||||
|
@ -98,7 +98,7 @@ extension Megrez {
|
|||
spans.remove(at: location)
|
||||
}
|
||||
for i in 0..<location {
|
||||
// zaps overlapping spans
|
||||
// 處理掉被損毀的或者重複的幅位。
|
||||
spans[i].dropNodesBeyond(length: location - i)
|
||||
}
|
||||
}
|
||||
|
@ -114,13 +114,7 @@ extension Megrez {
|
|||
let span = spans[location]
|
||||
for i in 1...maxBuildSpanLength {
|
||||
if let np = span.nodeOf(length: i) {
|
||||
results.append(
|
||||
.init(
|
||||
node: np,
|
||||
location: location,
|
||||
spanLength: i
|
||||
)
|
||||
)
|
||||
results.append(.init(node: np))
|
||||
}
|
||||
}
|
||||
return results // 已證實不會有空節點產生。
|
||||
|
@ -137,13 +131,7 @@ extension Megrez {
|
|||
let span = spans[i]
|
||||
if i + span.maxLength < location { continue }
|
||||
if let np = span.nodeOf(length: location - i) {
|
||||
results.append(
|
||||
.init(
|
||||
node: np,
|
||||
location: i,
|
||||
spanLength: location - i
|
||||
)
|
||||
)
|
||||
results.append(.init(node: np))
|
||||
}
|
||||
}
|
||||
return results // 已證實不會有空節點產生。
|
||||
|
@ -162,13 +150,7 @@ extension Megrez {
|
|||
for j in 1...span.maxLength {
|
||||
if i + j < location { continue }
|
||||
if let np = span.nodeOf(length: j) {
|
||||
results.append(
|
||||
.init(
|
||||
node: np,
|
||||
location: i,
|
||||
spanLength: location - i
|
||||
)
|
||||
)
|
||||
results.append(.init(node: np))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -193,7 +175,7 @@ extension Megrez {
|
|||
@discardableResult public func fixNodeWithCandidateLiteral(_ value: String, at location: Int) -> NodeAnchor {
|
||||
let location = abs(location) // 防呆
|
||||
var node = NodeAnchor()
|
||||
for theAnchor in nodesOverlappedAt(location: location) {
|
||||
for theAnchor in nodesCrossingOrEndingAt(location: location) {
|
||||
let candidates = theAnchor.node.candidates
|
||||
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
|
||||
theAnchor.node.resetCandidate()
|
||||
|
@ -217,7 +199,7 @@ extension Megrez {
|
|||
@discardableResult public func fixNodeWithCandidate(_ pair: KeyValuePaired, at location: Int) -> NodeAnchor {
|
||||
let location = abs(location) // 防呆
|
||||
var node = NodeAnchor()
|
||||
for theAnchor in nodesOverlappedAt(location: location) {
|
||||
for theAnchor in nodesCrossingOrEndingAt(location: location) {
|
||||
let candidates = theAnchor.node.candidates
|
||||
// 將該位置的所有節點的候選字詞鎖定狀態全部重設。
|
||||
theAnchor.node.resetCandidate()
|
||||
|
|
|
@ -30,28 +30,35 @@ extension Megrez {
|
|||
public var isEmpty: Bool { node.key.isEmpty }
|
||||
/// 節點。一個節锚內不一定有節點。
|
||||
public var node: Node = .init()
|
||||
/// 節锚所在的位置。
|
||||
public var location: Int = 0
|
||||
/// 指定的幅位長度。
|
||||
public var spanLength: Int = 0
|
||||
public var spanLength: Int { node.spanLength }
|
||||
/// 獲取用來比較的權重。
|
||||
public var scoreForSort: Double { node.score }
|
||||
/// 累計權重。
|
||||
public var mass: Double = 0.0
|
||||
/// 索引鍵的長度。
|
||||
public var keyLength: Int {
|
||||
isEmpty ? node.key.count : 0
|
||||
/// 單元圖陣列。
|
||||
public var unigrams: [Unigram] { node.unigrams }
|
||||
/// 雙元圖陣列。
|
||||
public var bigrams: [Bigram] { node.bigrams }
|
||||
/// 鍵。
|
||||
public var key: String { node.key }
|
||||
|
||||
/// 初期化一個節錨。
|
||||
public init(node: Node = .init(), mass: Double? = nil) {
|
||||
self.node = node
|
||||
self.mass = mass ?? self.node.score
|
||||
}
|
||||
|
||||
/// 將該節錨雜湊化。
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(node)
|
||||
hasher.combine(location)
|
||||
hasher.combine(spanLength)
|
||||
hasher.combine(mass)
|
||||
}
|
||||
|
||||
/// 將當前節锚列印成一個字串。
|
||||
public var description: String {
|
||||
var stream = ""
|
||||
stream += "{@(" + String(location) + "," + String(spanLength) + "),"
|
||||
stream += "{@(" + String(spanLength) + "),"
|
||||
if node.key.isEmpty {
|
||||
stream += node.description
|
||||
} else {
|
||||
|
@ -60,11 +67,6 @@ extension Megrez {
|
|||
stream += "}"
|
||||
return stream
|
||||
}
|
||||
|
||||
/// 獲取用來比較的權重。
|
||||
public var scoreForSort: Double {
|
||||
isEmpty ? node.score : 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ extension Megrez {
|
|||
lhs.key == rhs.key && lhs.score == rhs.score && lhs.unigrams == rhs.unigrams && lhs.bigrams == rhs.bigrams
|
||||
&& lhs.candidates == rhs.candidates && lhs.valueUnigramIndexMap == rhs.valueUnigramIndexMap
|
||||
&& lhs.precedingBigramMap == rhs.precedingBigramMap && lhs.isCandidateFixed == rhs.isCandidateFixed
|
||||
&& lhs.selectedUnigramIndex == rhs.selectedUnigramIndex
|
||||
&& lhs.selectedUnigramIndex == rhs.selectedUnigramIndex && lhs.spanLength == rhs.spanLength
|
||||
}
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
|
@ -38,6 +38,7 @@ extension Megrez {
|
|||
hasher.combine(score)
|
||||
hasher.combine(unigrams)
|
||||
hasher.combine(bigrams)
|
||||
hasher.combine(spanLength)
|
||||
hasher.combine(candidates)
|
||||
hasher.combine(valueUnigramIndexMap)
|
||||
hasher.combine(precedingBigramMap)
|
||||
|
@ -50,9 +51,11 @@ extension Megrez {
|
|||
/// 當前節點的當前被選中的候選字詞「在該節點內的」目前的權重。
|
||||
private(set) var score: Double = 0
|
||||
/// 單元圖陣列。
|
||||
private var unigrams: [Unigram]
|
||||
private(set) var unigrams: [Unigram]
|
||||
/// 雙元圖陣列。
|
||||
private var bigrams: [Bigram]
|
||||
private(set) var bigrams: [Bigram]
|
||||
/// 指定的幅位長度。
|
||||
public var spanLength: Int = 0
|
||||
/// 候選字詞陣列,以鍵值陣列的形式存在。
|
||||
private(set) var candidates: [KeyValuePaired] = []
|
||||
/// 專門「用單元圖資料值來調查索引值」的辭典。
|
||||
|
@ -83,10 +86,11 @@ extension Megrez {
|
|||
/// - key: 索引鍵。
|
||||
/// - unigrams: 單元圖陣列。
|
||||
/// - bigrams: 雙元圖陣列(非必填)。
|
||||
public init(key: String = "", unigrams: [Megrez.Unigram] = [], bigrams: [Megrez.Bigram] = []) {
|
||||
public init(key: String = "", spanLength: Int = 0, unigrams: [Megrez.Unigram] = [], bigrams: [Megrez.Bigram] = []) {
|
||||
self.key = key
|
||||
self.unigrams = unigrams
|
||||
self.bigrams = bigrams
|
||||
self.spanLength = spanLength
|
||||
|
||||
self.unigrams.sort {
|
||||
$0.score > $1.score
|
||||
|
|
|
@ -28,7 +28,7 @@ public protocol LangModelProtocol {
|
|||
func unigramsFor(key: String) -> [Megrez.Unigram]
|
||||
|
||||
/// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。
|
||||
func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram]
|
||||
func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram]
|
||||
|
||||
/// 給定鍵,確認是否有單元圖記錄在庫。
|
||||
func hasUnigramsFor(key: String) -> Bool
|
||||
|
@ -47,7 +47,7 @@ extension Megrez {
|
|||
}
|
||||
|
||||
/// 給定當前鍵與前述鍵,讓語言模型找給一組雙元圖陣列。
|
||||
open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
open func bigramsFor(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue